荐 python入门学习笔记-day17(6.21)之爬虫详解
程序员文章站
2022-11-01 21:11:25
python如何访问互联网URL + lib ----> urllibURL的一般格式为(带方括号[]的为可选项): protocol://hostname[:port]/path/[;parameters][?query]#fragment URL由三部分组成:第一部分是协议:http,https,ftp,file,ed2k…第二部分是存放资源的服务器的域名系统或IP地址(有时候要包含端口号,各种传输协议都有默认的端口号,如http的默认端口为80)第三部分是资源的...
python如何访问互联网
URL + lib ----> urllib
URL的一般格式为(带方括号[]的为可选项):
protocol://hostname[:port]/path/[;parameters][?query]#fragment
URL由三部分组成:
第一部分是协议:http,https,ftp,file,ed2k…
第二部分是存放资源的服务器的域名系统或IP地址(有时候要包含端口号,各种传输协议都有默认的端口号,如http的默认端口为80)
第三部分是资源的具体地址:如目录或文件名等
访问网站并保存图片
import urllib.request
response = urllib.request.urlopen('http://placekitten.com/g/500/600')
cat_img = response.read()
with open('cat_500_600.jpg', 'wb') as f: #二进制形式写入
f.write(cat_img)
版本二
import urllib.request
req = urllib.request.Request('http://placekitten.com/g/500/600')
response = urllib.request.urlopen(req)
cat_img = response.read()
with open('cat_500_600.jpg', 'wb') as f: #二进制形式写入
f.write(cat_img)
>>> response.geturl() #得到网址
'http://placekitten.com/g/500/600'
>>> response.info()
<http.client.HTTPMessage object at 0x000001E7DF5930D0>
>>> print(response.info())
Date: Sat, 20 Jun 2020 13:10:14 GMT
Content-Type: image/jpeg
Transfer-Encoding: chunked
Connection: close
Set-Cookie: __cfduid=d19f60a1226135d28a59d7e4ed53889281592658614; expires=Mon, 20-Jul-20 13:10:14 GMT; path=/; domain=.placekitten.com; HttpOnly; SameSite=Lax
Cache-Control: public, max-age=86400
Expires: Thu, 31 Dec 2020 20:00:00 GMT
Vary: User-Agent, Accept-Encoding
Access-Control-Allow-Origin: *
CF-Cache-Status: HIT
Age: 33658
cf-request-id: 037372f2000000197ad4357200000001
Server: cloudflare
CF-RAY: 5a65ba966b58197a-HKG
>>> response.getcode()
200
访问有道并翻译
import urllib.request
import urllib.parse
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = 'I love you'
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')
response = urllib.request.urlopen(url, data)
html = response.read().decode('utf-8')
print(html)
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
{"type":"EN2ZH_CN","errorCode":0,"elapsedTime":2,"translateResult":[[{"src":"I love you","tgt":"我爱你"}]]}
>>> import json
>>> json.loads(html)
{'type': 'EN2ZH_CN', 'errorCode': 0, 'elapsedTime': 2, 'translateResult': [[{'src': 'I love you', 'tgt': '我爱你'}]]}
>>> target = json.loads(html)
>>> type(target)
<class 'dict'>
>>> target['translateResult']
[[{'src': 'I love you', 'tgt': '我爱你'}]]
>>> target['translateResult'][0][0]
{'src': 'I love you', 'tgt': '我爱你'}
>>> target['translateResult'][0][0]['tgt']
'我爱你'
版本二
import urllib.request
import urllib.parse
import json
content = input('请输入需要翻译的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')
response = urllib.request.urlopen(url, data)
html = response.read().decode('utf-8')
target = json.loads(html)
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
import urllib.request
import urllib.parse
import json
content = input('请输入需要翻译的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
head = {}
#多了这个
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data, head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
target = json.loads(html)
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容:love
翻译结果: 爱
>>> req.headers
{'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
import urllib.request
import urllib.parse
import json
content = input('请输入需要翻译的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
'''
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
'''
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
target = json.loads(html)
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容:爱国
翻译结果: patriotic
>>> req.headers
{'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
import urllib.request
import urllib.parse
import json
import time
while True:
content = input('请输入需要翻译的内容(输入‘q!’退出程序:')
if content == 'q!':
break
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
'''
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
'''
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
target = json.loads(html)
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
time.sleep(5)
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容(输入‘q!’退出程序:爱
翻译结果: love
请输入需要翻译的内容(输入‘q!’退出程序:爱过
翻译结果: To have loved
请输入需要翻译的内容(输入‘q!’退出程序:
import urllib.request
url = 'http://whatismyip.com.tw'
proxy_support = urllib.request.ProxyHandler({'http':'213.226.11.149:41878'})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')
print(html)
import urllib.request
import time
import socket
import urllib.error
socket.setdefaulttimeout(20) # 设置socket层的超时时间为20秒
url = 'http://whatismyip.com.tw'
proxy_support = urllib.request.ProxyHandler({'http':'213.226.11.149:41878'})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')
response.close() # 注意关闭response
print(html)
time.sleep(1) # 自定义
import urllib.request
#import random
#import time
#import socket
#import urllib.error
#socket.setdefaulttimeout(20) # 设置socket层的超时时间为20秒
url = 'http://whatismyip.com.tw'
#iplist = ['171.35.166.80:9999', '113.195.19.107:9999','171.35.162.62:9999']
#proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})
proxy_support = urllib.request.ProxyHandler({'http':'113.121.76.0:9999'})
opener = urllib.request.build_opener(proxy_support)
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')]
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')
#response.close() # 注意关闭response
print(html)
#time.sleep(5) # 自定义
本文地址:https://blog.csdn.net/qq_39236499/article/details/106884266