urllib的handler使用以及具名元组namedtuple使用
程序员文章站
2022-05-04 07:50:22
...
简单用法:
# 简单用法
def get(url):
opener = build_opener(HTTPHandler())
resp = opener.open(url)
return resp
if __name__ == '__main__':
url = "http://www.baidu.com"
resp = get(url)
print(resp.read().decode('utf-8'))
使用具名元组:
#!/usr/bin/env python
# coding=utf-8
# urllib的handler使用
# 具名元组参考:https://www.runoob.com/note/25726
# namedtuple:具名元组
from collections import namedtuple
from urllib.request import HTTPHandler,build_opener
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
# 声明类
Response = namedtuple('Response',
field_names=['headers','code','text','body','encoding'])
def get(url):
opener = build_opener(HTTPHandler())
resp = opener.open(url)
# 要求返回某一个类的对象,它的属性包含:headers,code,text,body,类似requests库
# dict类型
headers = dict(resp.getheaders())
# int类型
code = resp.code
try:
encoding = headers['content-type'].split("=")[-1]
except:
encoding = 'utf-8'
# 字节码等相关属性
body = resp.read()
# 文本类型
text = body.decode(encoding)
return Response(headers=headers,code=code,text=text,body=body,encoding=encoding)
if __name__ == '__main__':
# url = "https://www.baidu.com"
url = "https://www.12306.cn/mormhweb/"
resp:Response = get(url)
print(resp.text)
print(resp.body)
print(resp.code)
print(resp.headers)
# resp.code = 300 # 会报错,不能修改
# 修改需要用_replace方法
resp = resp._replace(code=300)
print(resp.code)
# 获取所有字段名
print(resp._fields)
# # 将Response对象转换成字典,注意要使用"_asdict"
print(resp._asdict())