欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

requests_html爬取表情包

程序员文章站 2024-02-28 17:57:52
...

```#进行爬取https://fabiaoqing.com/biaoqing
import requests
import os
from requests_html import HTMLSession#必须使用session = HTMLSession()
session = HTMLSession()
os.makedirs('C:/表情包',exist_ok=True)
path='C:/表情包/'
a=0
fail=0
def save(respone,name):
    with open(path+name+'.jpg','wb') as f:
        f.write(respone)
def savegif(respone,name):
    with open(path + name + '.gif', 'wb') as f:
        f.write(respone)
def src(i):
    r=session.get('https://fabiaoqing.com/biaoqing/lists/page/'+str(i)+'.html')
    for i in range(1,46):
        div=r.html.find('#bqb > div.ui.segment.imghover > div:nth-child('+str(i)+') > a > img',first=True)
        # print(div.find('img'))#直接定位到img标签,具体分析,获取相应的数据
        try:
            print(div.attrs['data-original'])#获取到地址
            print(div.attrs['title'])#获取到title
            title=div.attrs['title']
            link=str(div.attrs['data-original'])
            print(link)
            connet=requests.get(link)
            if (link[-3:]=='jpg'):
                save(connet.content,title)
            else:
                savegif(connet.content, title)
            # with open(path + title + '.jpg', 'wb') as f:
            #     f.write(connet.content)
        except:
            print("没有定位到超链接")
            global fail
            fail=fail+1
        global a
        a=a+1
        print('在下载第d%张'%a)#下载了多少个
    print('失败d%张'%fail)

for i in range(0,201):
    src(i)



相关标签: 爬取表情包