欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python爬虫基础 --爬取有道翻译

程序员文章站 2022-05-04 11:50:14
...

言止于此,自我领悟

import hashlib
import json
import random
import time
import requests

base_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'

headers = {
    # 'Accept': 'application/json, text/javascript, */*; q=0.01',
    # 'Accept-Encoding': 'gzip, deflate',
    # 'Accept-Language': 'zh-CN,zh;q=0.9',
    # 'Connection': 'keep-alive',
    'Content-Length': '239',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': '[email protected]; P_INFO=django_ajax; JSESSIONID=aaazFWlbFlhN-fcKP7x8w; OUTFOX_SEARCH_USER_ID_NCOO=712821957.6986521; ___rl__test__cookies=1576654708361',
    # 'Host': 'fanyi.youdao.com',
    # 'Origin': 'http://fanyi.youdao.com',
    'Referer': 'http://fanyi.youdao.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
}
def get_md5(value):
    md5 = hashlib.md5()
    md5.update(bytes(value,encoding='utf-8'))
    return md5.hexdigest()
kw = 'python'
salt = str((time.time())*1000+random.randint(0,10))
ts =str((time.time())*1000)
value = 'fanyideskweb'+kw+salt+'n%A-rKaT5fb[Gy?;[email protected]'
sign =get_md5(value)
data = {
    'i': kw,
    'from': 'AUTO',
    'to': 'AUTO',
    'smartresult': 'dict',
    'client': 'fanyideskweb',
    'salt': salt,
    'sign': sign,
    'ts': ts,
    'bv': '6945a57e1923a3517303cdcdb2d3d15e',
    'doctype': 'json',
    'version': '2.1',
    'keyfrom': 'fanyi.web',
    'action': 'FY_BY_REALTlME',
}
response = requests.post(base_url,headers=headers,data=data)
# print(response.text)
result = ''
json_data = json.loads(response.text)
for data in json_data['smartResult']['entries']:
    result+=data
print(result)
相关标签: 爬虫 python