python爬虫基础项目 -- 爬取百度贴吧前十页
程序员文章站
2022-05-02 22:13:41
...
在子目录下创建一个tieba的空文件夹
import requests
import os
#基础url
base_url = 'http://tieba.baidu.com/f?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
}
# kw = '中国'
kw = 'lol'
filename = './tieba/'+kw ##文件夹名
if not os.path.exists(filename):
os.mkdir(filename)
for i in range(10):
params = {
'kw': kw,
'ie': 'utf-8',
'pn':str( i*50),
}
response = requests.get(base_url,headers=headers,params=params)
with open(filename+'/{}.html'.format(i+1),'w',encoding='utf-8') as fp:
fp.write(response.text)