欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

模拟浏览器爬取

程序员文章站 2022-05-09 21:17:03
...
import random
import urllib.request

url = 'http://www.baidu.com'
def method_name():
    url = 'http://www.baidu.com'
    headers = {
        'User-Agnet': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
    # 设置一个请求体
    req = urllib.request.Request(url, headers=headers)
    # 发起请求
    response = urllib.request.urlopen(req)
    data = response.read().decode('utf-8')
    print(data)


# method_name()

agnetsList =['Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Mobile Safari/537.36',
             'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1',
             'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Mobile Safari/537.36'
             ]
agentStr = random.choice(agnetsList)
req = urllib.request.Request(url)
# 想请求体里添加User-Agent
req.add_header('User-Agent','agentStr')
response = urllib.request.urlopen(req)
print(response.read().decode())