python爬取谷歌翻译 爬虫实践
程序员文章站
2022-05-04 12:05:06
...
python爬取谷歌翻译 爬虫实践
首先先安装第三方库:
pip3 install PyExecJS -i https://pypi.tuna.tsinghua.edu.cn/simple
macos系统运行代码报错的话在终端运行如下代码:
/Applications/Python\ 3.7/Install\ Certificates.command ; exit;
英译中代码:
import urllib.request
import execjs
# 谷歌翻译
class Py4Js():
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e[f++] = m >> 18 | 240,
e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
e[f++] = m >> 6 & 63 | 128),
e[f++] = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e[f],
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def getTk(self, text):
"""获取tk值"""
return self.ctx.call("TL", text)
# 获取要翻译的文字
key = input("请输入要翻译的英文文字(英译中):")
# 获取Py4Js实例
py = Py4Js()
print(py.getTk(key)) # 打印tk值
# 请求的URL
url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&otf=1&ssel=0&tsel=0&kc=7&tk=" + py.getTk(
key) + "&q=" + key
# 请求头
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
# 获取request对象
req = urllib.request.Request(url, headers=header)
# 响应的数据是通过Ajax返回的JSON格式数据,注意编码问题
resp = urllib.request.urlopen(req).read().decode("utf-8")
# 打印
print(resp)
中译英代码:
import urllib.parse
import urllib.request
import execjs
# 谷歌翻译
class Py4Js():
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e[f++] = m >> 18 | 240,
e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
e[f++] = m >> 6 & 63 | 128),
e[f++] = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e[f],
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def getTk(self, text):
"""获取tk值"""
return self.ctx.call("TL", text)
# 获取要翻译的文字
key = input("请输入要翻译的中文文字(中译英):")
# 获取Py4Js实例
py = Py4Js()
print(py.getTk(key)) # 打印tk值
# 对中文进行处理
data = urllib.parse.urlencode({"q": key})
# 请求的URL
url = "https://translate.google.cn/translate_a/single?client=webapp&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=6&tsel=3&kc=1&tk=" + py.getTk(
key) + "&" + data
print(url) # 打印URL
# 请求头
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
# 获取request对象
req = urllib.request.Request(url, headers=header)
# 响应的数据是通过Ajax返回的JSON格式数据,注意编码问题
resp = urllib.request.urlopen(req).read().decode("utf-8")
# 打印
print(resp)