python 爬虫 大众点评美食排名
程序员文章站
2022-05-02 22:15:05
...
import requests
from bs4 import BeautifulSoup
import re
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def getStockList(lst, stockURL,city_lst,infodict):
html = getHTMLText(stockURL)
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all("h4")
b = soup.find_all("a", href = re.compile("http://www.dianping.com/search/category/33/0/r\d{4}"))
print (a,b)
for i in a[2:len(a)-2]:
name = i.text
lst.append(name)
print (name)
count=0
for j in b:
try:
address = j.text.split()[0]+j.text.split()[1]
city_lst.append(address)
infodict[lst[count]]=address
print (address)
count+=1
except:
count+=1
continue
def main():
stock_list_url = 'http://www.dianping.com/search/category/33/10/r3300'
output_file = 'E:/dzdpmspm.txt'
slist=[]
clist=[]
infoDict={}
getStockList(slist, stock_list_url,clist,infoDict)
for n in range(2,51):
stock_list_url="http://www.dianping.com/search/category/33/10/r3300p"+str(n)+"?aid=91959818%2C93071129"
getStockList(slist, stock_list_url,clist,infoDict)
with open(output_file, 'a', encoding='utf-8') as f:
f.write(str(infoDict.items()) + '\n' )
print("\r当前速度:{:.2f}%".format(n*100/50),end="")
main()