欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python:爬虫爬取妹子网图片-文件爬取

程序员文章站 2022-05-04 14:58:41
...

import requests

 

import os

 

from lxml import etree

headers = {

 

    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',

 

    'Referer': 'http://www.mzitu.com'}

 

# URL列表

 

urls = ["https://www.mzitu.com/page/{}".format(str(i)) for i in range(2, 11)]

 

# 路径,可以更改成你的路径

 

path = 'G:/photo/'

 

 

# 获取图片并写入本地文件

 

def get_girlphoto(url):

    data = requests.get(url, headers=headers)

 

    selector = etree.HTML(data.text)

 

    # 获取图片的URL列表

 

    girlphoto_urls = selector.xpath('//div/ul/li/a/img/@data-original')

 

    print(girlphoto_urls)

 

    # 循环每个图片链接并写入本地文件,写入要用二进制

 

    for item in girlphoto_urls:

        data = requests.get(item, headers=headers)

        with open(path + item[-10:], 'wb') as f:

            f.write(data.content)

 

            f.close()

 

 

if __name__ == '__main__':  # 主函数

 

    # 循环URL

 

    for url in urls:

        get_girlphoto(url)