python入门爬虫,爬取女生头像(无需反爬)
程序员文章站
2022-05-02 17:37:46
...
环境:python3.8
IDE:pycharm
# coding: utf-8
import requests
from lxml import etree
import re
import uuid
import time
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/63.0.3239.132 Safari/537.36'}
class Spider:
def imgrun(self,url,count):
# 设置保存路径
imgpath="A:\img"
num = 1
self.url=url
self.count=count
print('url========'+url)
res=requests.get(url=url,headers=header)
# utf8编码
res.encoding = 'utf8'
# 输出访问网站返回code
print(res.status_code)
# 将request.content 转化为 Element
html = etree.HTML(res.content)
# 选取图片
items = html.xpath('//*[@id="main"]/div[3]/div[1]/div[2]/div/a[1]/img/@src')
# print(type(items))
for i in items:
i+=''
imgurl = 'https:' + i
print('开始第'+str(count)+'次 的第'+str(num)+'张图片')
# 去掉400*400
# imgurl = re.sub(r'!400x400', "", imgurl)
print(imgurl)
result=requests.get(imgurl)
with open(imgpath+'\\'+str(uuid.uuid1())+'.jpeg', 'wb') as f:
f.write(result.content)
time.sleep(1)
num+=1
print('download ok')
def imgmain(self):
# 拼接下一页,从第几次开始就是range起始值加2
for i in range(103,110):
print("开始第" + str(i-1) + "次")
url = "https://www.woyaogexing.com/touxiang/nv/index_{}.html".format(i)
self.imgrun(url,i-1)
# time.sleep(1)
# print(url)
sp=Spider()
# sp.modular()
sp.imgmain()
上一篇: python :通过爬虫爬取数据(1)
下一篇: KMP算法python实现