欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python爬虫相关

程序员文章站 2024-02-22 15:12:28
...
import requests
#url:图片的链接地址,12.jpg:图片在本地的名称
url='https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1491640885248&di=119159673e19737701726aa75b667a3b&imgtype=0&src=http%3A%2F%2Fdesk.fd.zol-img.com.cn%2Ft_s960x600c5%2Fg2%2FM00%2F0D%2F05%2FChMlWVW3ScSIEd9SAAGFKpo0iqgAAHvoQLIkAwAAYVC501.jpg'
html=requests.get(url)
with open('12.jpg','wb') as f:
    f.write(html.content)
#简单抓取图片,保存为0.jpg,1.jpg类似的格式
#! -*-coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

html = requests.get('http://www.mmjpg.com/').content
soup = BeautifulSoup(html, "html.parser")
listUrls = soup.select('img[src]')
k = 0
for list in listUrls:
    print(list.get('src'))
    with open(str(k) + '.jpg', "wb") as f:
        f.write(requests.get(list.get('src')).content)
    k += 1


#! -*-coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

#爬取豆瓣电影top250,保存到记事本中
def get_one_page(url):
    head=head={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'}
    response = requests.get(url,headers=head)
    return response.text


def parse_one_page(html):
    #get one page info
    soup = BeautifulSoup(html, "html.parser")
    ranks = soup.find_all('em')
    titles = soup.find_all('img')
    #write datas to douban.txt
    for rank, title in zip(ranks, titles):
        with open('douban.txt', "a+", encoding='utf-8') as f:
            f.write(rank.string + '\t' + title.get('alt') + '\n')
            f.close()


if __name__ == '__main__':
    for i in range(10):
        #10 pages urls
        url = 'https://movie.douban.com/top250?start=' + str(i * 25) + '&filter='
        html = get_one_page(url)
        parse_one_page(html)

#解析json格式,实现天气预报
import requests
import json

url=r'http://wthrcdn.etouch.cn/weather_mini?citykey=101210101'
jsonStr=requests.get(url).text

data=json.loads(jsonStr)
weather=data['data']
print(data)
print("city:",weather['city'])
print("wendu:",weather['wendu'])

转载于:https://www.jianshu.com/p/b4b2c82a837f