requests_html爬取表情包
程序员文章站
2024-02-28 17:57:52
...
```#进行爬取https://fabiaoqing.com/biaoqing
import requests
import os
from requests_html import HTMLSession#必须使用session = HTMLSession()
session = HTMLSession()
os.makedirs('C:/表情包',exist_ok=True)
path='C:/表情包/'
a=0
fail=0
def save(respone,name):
with open(path+name+'.jpg','wb') as f:
f.write(respone)
def savegif(respone,name):
with open(path + name + '.gif', 'wb') as f:
f.write(respone)
def src(i):
r=session.get('https://fabiaoqing.com/biaoqing/lists/page/'+str(i)+'.html')
for i in range(1,46):
div=r.html.find('#bqb > div.ui.segment.imghover > div:nth-child('+str(i)+') > a > img',first=True)
# print(div.find('img'))#直接定位到img标签,具体分析,获取相应的数据
try:
print(div.attrs['data-original'])#获取到地址
print(div.attrs['title'])#获取到title
title=div.attrs['title']
link=str(div.attrs['data-original'])
print(link)
connet=requests.get(link)
if (link[-3:]=='jpg'):
save(connet.content,title)
else:
savegif(connet.content, title)
# with open(path + title + '.jpg', 'wb') as f:
# f.write(connet.content)
except:
print("没有定位到超链接")
global fail
fail=fail+1
global a
a=a+1
print('在下载第d%张'%a)#下载了多少个
print('失败d%张'%fail)
for i in range(0,201):
src(i)