欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Requests 和 Scrapy 添加动态IP代理

程序员文章站 2022-05-07 10:54:25
...

Requests

import requests

# 要访问的目标页面
targetUrl = "http://test.abuyun.com/proxy.php"
#targetUrl = "http://proxy.abuyun.com/switch-ip"
#targetUrl = "http://proxy.abuyun.com/current-ip"

# 代理服务器
proxyHost = "proxy.abuyun.com"
proxyPort = "9000"

# 代理隧道验证信息
proxyUser = "H225506235A2NG0p"
proxyPass = "123456"

proxyMeta = "http://%(user)s:%(pass)[email protected]%(host)s:%(port)s" % {
    "host" : proxyHost,
    "port" : proxyPort,
    "user" : proxyUser,
    "pass" : proxyPass,
}

proxies = {
    "http"  : proxyMeta,
    "https" : proxyMeta,
}

res = requests.get(targetUrl, proxies=proxies).text

print(res.text)
scrapy
import base64
from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware

# 代理服务器
proxyServer = "http://proxy.abuyun.com:9010"

# 隧道身份信息
proxyUser = "H225506235A2NG0p"
proxyPass = "123456"
proxyAuth = "Basic " + str(base64.b64encode(str(proxyUser + ":" + proxyPass).encode('utf-8')), encoding='utf-8')

class ProxyMiddleware(HttpProxyMiddleware):
    proxies = {}

    def __init__(self, auth_encoding='latin-1'):
        self.auth_encoding = auth_encoding

        self.proxies[proxyServer] = proxyUser + proxyPass

    def process_request(self, request, spider):
        request.meta["proxy"] = proxyServer

        request.headers["Proxy-Authorization"] = proxyAuth