使用requests的session机制模拟登陆知乎
程序员文章站
2024-03-17 21:34:58
...
开发环境
- Python版本: python3.6
- Python库: requests, pillow, re
- 浏览器: Google Chrome 62.0.3202.94
- 调试工具:pycharm 2017.1.3
- 电脑系统:Windows 10 64位系统
模拟登陆代码
import requests
try:
import cookielib
except:
import http.cookiejar as cookielib
import re
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename="cookies.txt")
try:
session.cookies.load(ignore_discard=True)
except:
print ("cookie未能加载")
headers = {
'Host': "www.zhihu.com",
'Referer': "https://www.zhihu.com/signin?next=/",
'user-agent': "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36"
}
def is_login():
#通过个人中心页面返回状态码来判断是否为登录状态
inbox_url = 'https://www.zhihu.com/inbox'
response = session.get(inbox_url, headers=headers, allow_redirects=False)
if response.status_code != 200:
return False
else:
return True
def get_xsrf():
#获取xsrf code
response = session.get("https://www.zhihu.com/signin?next=/#signin", headers=headers)
match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text)
if match_obj:
return (match_obj.group(1))
else:
return ""
def get_index():
response = session.get("https://www.zhihu.com", headers=headers)
with open("index_page.html", "wb") as f:
f.write(response.text.encode("utf-8"))
print ("ok")
def get_captcha():
import time
t = str(int(time.time()*1000))
captcha_url = "https://www.zhihu.com/captcha.gif?r={0}&type=login".format(t)
t = session.get(captcha_url, headers=headers)
with open("captcha.jpg","wb") as f:
f.write(t.content)
f.close()
from PIL import Image
try:
im = Image.open('captcha.jpg')
im.show()
im.close()
except:
pass
captcha = input("输入验证码\n>")
return captcha
def zhihu_login(account, password):
#知乎登录
if re.match("^1\d{10}",account):
print ("手机号码登录")
post_url = "https://www.zhihu.com/login/phone_num"
post_data = {
"_xsrf": get_xsrf(),
"phone_num": account,
"password": password,
"captcha":get_captcha()
}
else:
if "@" in account:
#判断用户名是否为邮箱
print("邮箱方式登录")
post_url = "https://www.zhihu.com/login/email"
post_data = {
"_xsrf": get_xsrf(),
"email": account,
"password": password
}
response_text = session.post(post_url, data=post_data, headers=headers)
session.cookies.save()
zhihu_login("187********", "******")
# get_index()
is_login()
# get_captcha()
代码分析
访问知乎登陆界面
https://www.zhihu.com/#signin
按 F12 调试工具
点击: 登陆 – 使用密码登陆
在Network选项中, 勾选 preserve log (点击登陆会有页面会转跳,清空数据)
点击登陆之后,查看 XHR 参数, 我们会看到 phone_num 这个url传递的参数带有登陆信息
有四个参数: _xsrf 、password、 captcha_type、phone_num
第一个参数,当我们查看知乎登陆界面的源代码时, 搜索这个关键词就能查到。
验证码
知乎登陆有中文验证码和英文验证码请求,上述代码使用英文验证码请求的