欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python 爬虫小练习--爬取有道词典

程序员文章站 2022-05-04 11:45:26
...
#!/usr/local/python3/bin/python3
 
import urllib.request
import urllib.parse
import json
import time

while True:
    content = input('请输入需要翻译的内容(按"q!"退出): ')
    if content == "q!":
        break

    url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
    #新版的有道词典,在请求时启用了md5认证,去掉原url中的 "_o" 可免认证
 
    data = {}
    data['i'] = content
    data['from'] = 'AUTO'
    data['to'] = 'AUTO'
    data['smartresult'] = 'dict'
    data['client'] = 'fanyideskweb'
    data['salt'] = '15604981263730'
    data['sign'] = 'a4ddf499a7ec36826abc283abcc828d8'
    data['ts'] = '1560498126373'
    data['bv'] = '44eca3fe61b233886c1a0e332df26d16'
    data['doctype'] = 'json'
    data['version'] = '2.1'
    data['keyfrom'] = 'fanyi.web'
    data['action'] = 'FY_BY_CLICKBUTTION'
    data['typoResult'] = 'false'
    
    #利用urlencode编码成url的形式
    data = urllib.parse.urlencode(data).encode('utf-8')
 
    req = urllib.request.Request(url,data)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36')

    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')
    target = json.loads(html)
 
    print('翻译结果: %s' % (target['translateResult'][0][0]['tgt']))
    time.sleep(2)