python:爬虫之Post请求以及动态Ajax数据的爬取(3)
程序员文章站
2022-05-05 15:26:13
...
#爬虫的post方式
作用:对参数进行打包反馈给服务器
import urllib.request
import urllib.parse #对参数打包
url = "http://www.sunck.wang:8085/form"
data = {
"username":"sunck",
"passwd":"123"
}
postdata = urllib.parse.urlencode(data).encode("utf-8") #对data打包合成一个字典
#创建请求体
req = urllib.request.Request(url,data=postdata)
#发起请求,对req添加一个请求头
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0")
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))
#抓取动态页面
def AjaxDemo(url) :
headers ={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
req = urllib.request.Request(url,headers=headers)
context =ssl._create_unverified_context() #使用ssl创建未验证的上下数据
response = urllib.request.urlopen(req,context)
JsonStr = response.read().decode("utf-8")
data = json.loads(JsonStr)
return data
url = "movie.douban.com/动态/" #模拟动态url
print(AjaxDemo(url))