股吧网站翻页爬取前十页
程序员文章站
2022-05-02 22:13:59
...
import requests
import os
class GuBa:
def __init__(self, page):
self.run(page)
def run(self, page):
'''
http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=1
http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=2
'''
path = "guba/"
if not os.path.exists(path):
os.makedirs(path)
for p in range(page):
pn = str(p + 1)
base_url = "http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=" + pn
response = requests.get(base_url)
html = response.text
with open(path + pn + ".html", "w", encoding="utf-8") as f:
f.write(html)
if __name__ == '__main__':
GuBa(10)
上一篇: 新浪新闻标题和网址爬取