生成随机的ip代理池
程序员文章站
2022-05-19 13:21:54
...
rand_ip.py
# https://www.xicidaili.com/wt/1 (西刺代理)
# 大象代理
import requests
from lxml import etree
from random import choice
# 获取有效ip并随机选择一个ip
# 每次用时执行这个脚本就可以获取最新ip代理池并随机选择一个ip
class Get_Ip(object):
@classmethod
def run(cls):
cls.get_allpage_ip("https://www.xicidaili.com/nn/")
f = open("rand_ip.txt", 'r', encoding="utf-8")
conent = f.readlines()
f.close()
random_ip=cls.rand_ip(conent)
return random_ip
@classmethod
def get(cls, url, is_text=True):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
response = requests.get(url, headers=headers,verify=False)
response.encoding = response.apparent_encoding
return response.text if is_text else response.content
@classmethod
def get_allpage_ip(cls, url):
root = etree.HTML(cls.get(url))
all_page = root.xpath('//a[@href="/nn/3799"]/text()')[0]
all_page = int(all_page)
cls.f = open("rand_ip.txt", 'w', encoding="utf-8")
for page in range(1, all_page+1):
perpage_url = url + str(page)
cls.get_ip(perpage_url)
# 我们只需要当天的ip,这里break一下,
break
cls.f.close()
@classmethod
def get_ip(cls, url):
root = etree.HTML(cls.get(url))
ips = root.xpath("//table[@id='ip_list']/tr")[1:]
for ip in ips:
ip_exist_time = ip.xpath("td[9]/text()")
ip_exist_time = ip_exist_time[0] if ip_exist_time else "无ip存活时间"
if '天' not in ip_exist_time:
continue
ip_address = ip.xpath("td[2]/text()")[0]
ip_port = ip.xpath("td[3]/text()")[0]
ip_type = ip.xpath("td[6]/text()")[0]
ip_type= ip_type.lower()
cls.f.write(ip_type + "://" + ip_address + ":" + ip_port + "\n")
@classmethod
def rand_ip(cls,content):
random_ip = choice(content).strip()
return random_ip
ip=Get_Ip.run()
print(ip)