python爬虫相关
程序员文章站
2024-02-22 15:12:28
...
import requests
#url:图片的链接地址,12.jpg:图片在本地的名称
url='https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1491640885248&di=119159673e19737701726aa75b667a3b&imgtype=0&src=http%3A%2F%2Fdesk.fd.zol-img.com.cn%2Ft_s960x600c5%2Fg2%2FM00%2F0D%2F05%2FChMlWVW3ScSIEd9SAAGFKpo0iqgAAHvoQLIkAwAAYVC501.jpg'
html=requests.get(url)
with open('12.jpg','wb') as f:
f.write(html.content)
#简单抓取图片,保存为0.jpg,1.jpg类似的格式
#! -*-coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
html = requests.get('http://www.mmjpg.com/').content
soup = BeautifulSoup(html, "html.parser")
listUrls = soup.select('img[src]')
k = 0
for list in listUrls:
print(list.get('src'))
with open(str(k) + '.jpg', "wb") as f:
f.write(requests.get(list.get('src')).content)
k += 1
#! -*-coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
#爬取豆瓣电影top250,保存到记事本中
def get_one_page(url):
head=head={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'}
response = requests.get(url,headers=head)
return response.text
def parse_one_page(html):
#get one page info
soup = BeautifulSoup(html, "html.parser")
ranks = soup.find_all('em')
titles = soup.find_all('img')
#write datas to douban.txt
for rank, title in zip(ranks, titles):
with open('douban.txt', "a+", encoding='utf-8') as f:
f.write(rank.string + '\t' + title.get('alt') + '\n')
f.close()
if __name__ == '__main__':
for i in range(10):
#10 pages urls
url = 'https://movie.douban.com/top250?start=' + str(i * 25) + '&filter='
html = get_one_page(url)
parse_one_page(html)
#解析json格式,实现天气预报
import requests
import json
url=r'http://wthrcdn.etouch.cn/weather_mini?citykey=101210101'
jsonStr=requests.get(url).text
data=json.loads(jsonStr)
weather=data['data']
print(data)
print("city:",weather['city'])
print("wendu:",weather['wendu'])
转载于:https://www.jianshu.com/p/b4b2c82a837f
上一篇: Camel 2.4.0 发布了