BeautifulSoup获取股票信息
程序员文章站
2022-03-01 22:33:27
...
获取全量股票ID
获取股票明细加写表
循环调用
import urllib.request import requests from bs4 import BeautifulSoup def getAllstockId(): url = "https://hq.gucheng.com/gpdmylb.html" page_info = requests.get(url) page_info.encoding = 'utf-8' soup = BeautifulSoup(page_info.text, 'html.parser') # 文档对象 # print(soup.text) list = [] for k in soup.find_all('section', class_='stockTable'): # print(titlestr) for b in k.find_all('a'): titlestr = b.text.split('(')[1].split(')')[0] list.append(titlestr) return list
获取股票明细加写表
import urllib.request import requests from bs4 import BeautifulSoup def getgaoguanInfo(params): url = "http://stock.jrj.com.cn/share,"+params+",ggry.shtml" page_info = requests.get(url) page_info.encoding = 'gbk' soup = BeautifulSoup(page_info.text, 'html.parser') #文档对象 title = soup.find("title") titlestr = title.text.split('(')[0] list = [] with open('d:/a.txt', 'a') as f: for k in soup.find_all('table',class_='tab1'): for j in k.find_all('tr'): l = titlestr+'|'+params k = 0 for t in j.find_all('td'): k=k+1 if k == 8 : l = l else: l = l+"|"+t.text list.append(l) if len(l)>20: f.writelines(l+'\n') f.close()
循环调用
from webcrawler.spilder import getgaoguanInfo from webcrawler.getstockid import getAllstockId list = getAllstockId(); for k in list: print(k) getgaoguanInfo(k)