Python爬虫--requests库基本用法
程序员文章站
2022-06-09 22:43:36
requests是python实现的简单易用的HTTP库。下面介绍其一些基本的用法。1 、基本用法import requestsresponse = requests.get('http://www.baidu.com')#print(type(response)) # 查看类型#print(dir(response)) # 查看response下的方法print(response.status_code) # 打印状态码print(response.url) # 打印请求URL...
requests是python实现的简单易用的HTTP库。下面介绍其一些基本的用法。
1 、基本用法
import requests
response = requests.get('http://www.baidu.com')
#print(type(response)) # 查看类型
#print(dir(response)) # 查看response下的方法
print(response.status_code) # 打印状态码
print(response.url) # 打印请求URL
print(response.headers) # 打印头信息
print(response.cookies) # 打印cookie 信息
print(response.text) # 以文本形式打印网页源码
print(response.content) # 以字节流形式打印网页源码
2 、各种请求方式
import requests
response = requests.get('http://httpbin.org/get')
print(response.text)
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
3、 带参数的get请求
方式一:直接将参数写在URL中
# 方式一:直接将参数写在URL中
import requests
response = requests.get('http://httpbin.org/get?name=Tony&age=23')
print(response.text)
结果如下:
方式二:先将参数写到dict字典中,然后在请求params参数指定
import requests
data = {'name':'Tony','age':23}
response = requests.get('http://httpbin.org/get',data)
print(response.text)
结果同上。
例2:
import requests
params = {'k1': 'v1', 'k2': [1, 2, 3]}
params2 = {'k1': 'v1', 'k2': None}
r = requests.get('http://httpbin.org/get', params)
r2 = requests.get('http://httpbin.org/get', params2)
print(r.url) # 输出:http://httpbin.org/get?k1=v1&k2=1&k2=2&k2=3
print(r2.url) # 输出:http://httpbin.org/get?k1=v1
4、解析json
import requests
r = requests.get('https://github.com/timeline.json')
print(type(r.json()))
print(r.text)
5、二进制文件
# 图片做为二进制数据进行处理,获取图片的地址,然后通过二进制转换为图片并存储下来
import requests
from PIL import Image
from io import BytesIO
r = requests.get('https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1607513130911&di=3771dc5570d59ee9a87fa0cbce37c313&imgtype=0&src=http%3A%2F%2Fa4.att.hudong.com%2F22%2F59%2F19300001325156131228593878903.jpg')
image = Image.open(BytesIO(r.content))
image.save('pic.jpg')
结果在文件目录下生成对应的图片。
6、原始数据处理
以流数据存储一张照片
# 以流数据存储一张照片
import requests
r = requests.get('https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1607513518482&di=28c7f32ac01948bfca8da75030b7ca8c&imgtype=0&src=http%3A%2F%2Fa2.att.hudong.com%2F27%2F81%2F01200000194677136358818023076.jpg', stream = True)
with open('pic.jpg', 'wb+') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
7、POSR请求提交表单
import requests
form = {'username': 'user', 'password': 'pass'}
r = requests.post('http://httpbin.org/post', data=form)
print(r.text) # form表单形式
输出结果:
{
"args": {},
"data": "",
"files": {},
"form": {
"password": "pass",
"username": "user"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "27",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.22.0",
"X-Amzn-Trace-Id": "Root=1-5fd08ff0-0015ad7c77cea92a027d4294"
},
"json": null,
"origin": "124.160.154.21",
"url": "http://httpbin.org/post"
}
import requests
r = requests.post('http://httpbin.org/post', data=json.dumps(form))
print(r.text) # data形式
输出结果:
{
"args": {},
"data": "{\"username\": \"user\", \"password\": \"pass\"}",
"files": {},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "40",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.22.0",
"X-Amzn-Trace-Id": "Root=1-5fd09067-18566b24664f50eb30792aa1"
},
"json": {
"password": "pass",
"username": "user"
},
"origin": "124.160.154.21",
"url": "http://httpbin.org/post"
}
8、获取cookie
import requests
url = 'http://www.baidu.com'
r = requests.get(url)
#字典对象
cookies = r.cookies
#遍历字典对象
for k, v in cookies.items():
print(k, v)
输出结果:
BDORZ 27315
本文地址:https://blog.csdn.net/LOVEYSUXIN/article/details/110930213