模拟浏览器爬取
程序员文章站
2022-05-09 21:17:03
...
import random
import urllib.request
url = 'http://www.baidu.com'
def method_name():
url = 'http://www.baidu.com'
headers = {
'User-Agnet': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
# 设置一个请求体
req = urllib.request.Request(url, headers=headers)
# 发起请求
response = urllib.request.urlopen(req)
data = response.read().decode('utf-8')
print(data)
# method_name()
agnetsList =['Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Mobile Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1',
'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Mobile Safari/537.36'
]
agentStr = random.choice(agnetsList)
req = urllib.request.Request(url)
# 想请求体里添加User-Agent
req.add_header('User-Agent','agentStr')
response = urllib.request.urlopen(req)
print(response.read().decode())