python爬虫--模拟12306登录
程序员文章站
2023-12-29 08:49:40
模拟12306登录 超级鹰: 爬虫程序: ......
模拟12306登录
超级鹰:
#!/usr/bin/env python # coding:utf-8 import requests from hashlib import md5 class chaojiying_client(object): def __init__(self, username, password, soft_id): self.username = username password = password.encode('utf8') self.password = md5(password).hexdigest() self.soft_id = soft_id self.base_params = { 'user': self.username, 'pass2': self.password, 'softid': self.soft_id, } self.headers = { 'connection': 'keep-alive', 'user-agent': 'mozilla/4.0 (compatible; msie 8.0; windows nt 5.1; trident/4.0)', } def postpic(self, im, codetype): """ im: 图片字节 codetype: 题目类型 参考 http://www.chaojiying.com/price.html """ params = { 'codetype': codetype, } params.update(self.base_params) files = {'userfile': ('ccc.jpg', im)} r = requests.post('http://upload.chaojiying.net/upload/processing.php', data=params, files=files, headers=self.headers) return r.json() def reporterror(self, im_id): """ im_id:报错题目的图片id """ params = { 'id': im_id, } params.update(self.base_params) r = requests.post('http://upload.chaojiying.net/upload/reporterror.php', data=params, headers=self.headers) return r.json() def codecjy(): chaojiying = chaojiying_client('tjtj', 'tangjian219', '902590')#用户中心>>软件id 生成一个替换 96001 用户名 密码 软件id im = open('./code.png', 'rb').read()#本地图片文件路径 来替换 a.jpg 有时win系统须要// code=chaojiying.postpic(im,9004)['pic_str']#1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加() return code
爬虫程序:
from selenium import webdriver from selenium.webdriver import actionchains from time import sleep from pil import image #安装pil或者是pillow from cjy import chaojiying_client #封装一个识别验证码的函数 def transformcode(imgpath,imgtype): chaojiying = chaojiying_client('超级鹰用户名', '超级鹰密码', '899370') im = open(imgpath, 'rb').read() return chaojiying.postpic(im, imgtype)['pic_str'] bro = webdriver.chrome(executable_path='./chromedriver.exe') bro.get('https://kyfw.12306.cn/otn/login/init') sleep(2) #将当前浏览器页面进行图片保存 bro.save_screenshot('./main.png') #将验证码的局部区域进行裁剪 #捕获标签在页面中的位置信息 img_tag = bro.find_element_by_xpath('//*[@id="loginform"]/div/ul[2]/li[4]/div/div/div[3]/img') location = img_tag.location#标签的起始位置坐标(左下角坐标) size = img_tag.size#标签的尺寸 #裁剪范围对应的矩形区域 rangle = (int(location['x']),int(location['y']),int(location['x']+size['width']),int(location['y']+size['height'])) #使用image工具进行指定区域的裁剪 i = image.open('./main.png') frame = i.crop(rangle)#crop就是根据指定的裁剪范围进行图片的截取 frame.save('code.png') #调用打码平台进行验证码的识别 result = transformcode('./code.png',9004) print(result) #x1,y1|x2,y2|x3,y3 #x1,y1|x2,y2|x3,y3 ==>[[x1,y1],[x2,y2],[x3,y3]] all_list = []#[[x1,y1],[x2,y2],[x3,y3]] 验证码中图片中的点击内容的坐标 if '|' in result: list_1 = result.split('|') count_1 = len(list_1) for i in range(count_1): xy_list = [] x = int(list_1[i].split(',')[0]) y = int(list_1[i].split(',')[1]) xy_list.append(x) xy_list.append(y) all_list.append(xy_list) else: x = int(result.split(',')[0]) y = int(result.split(',')[1]) xy_list = [] xy_list.append(x) xy_list.append(y) all_list.append(xy_list) for point in all_list: x = point[0] y = point[1] actionchains(bro).move_to_element_with_offset(img_tag,x,y).click().perform() sleep(1) bro.find_element_by_id('username').send_keys('xxxxxx') #12306用户名 sleep(1) bro.find_element_by_id('password').send_keys('xxxx') #密码 sleep(1) bro.find_element_by_id('loginsub').click() #点击事件,登录 sleep(10) print(bro.page_source) bro.quit()