欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

python爬虫--模拟12306登录

程序员文章站 2023-12-29 08:49:40
模拟12306登录 超级鹰: 爬虫程序: ......

模拟12306登录

超级鹰:

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5

class chaojiying_client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        password =  password.encode('utf8')
        self.password = md5(password).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'connection': 'keep-alive',
            'user-agent': 'mozilla/4.0 (compatible; msie 8.0; windows nt 5.1; trident/4.0)',
        }

    def postpic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/upload/processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def reporterror(self, im_id):
        """
        im_id:报错题目的图片id
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/upload/reporterror.php', data=params, headers=self.headers)
        return r.json()


def codecjy():
    chaojiying = chaojiying_client('tjtj', 'tangjian219', '902590')#用户中心>>软件id 生成一个替换 96001                         用户名        密码        软件id
    im = open('./code.png', 'rb').read()#本地图片文件路径 来替换 a.jpg 有时win系统须要//
    code=chaojiying.postpic(im,9004)['pic_str']#1902 验证码类型  官方网站>>价格体系 3.4+版 print 后要加()
    return code

爬虫程序:

from selenium import webdriver
from selenium.webdriver import actionchains
from time import sleep
from pil import image #安装pil或者是pillow
from cjy import chaojiying_client

#封装一个识别验证码的函数
def transformcode(imgpath,imgtype):
    chaojiying = chaojiying_client('超级鹰用户名', '超级鹰密码', '899370')
    im = open(imgpath, 'rb').read()
    return chaojiying.postpic(im, imgtype)['pic_str']


bro = webdriver.chrome(executable_path='./chromedriver.exe')

bro.get('https://kyfw.12306.cn/otn/login/init')
sleep(2)
#将当前浏览器页面进行图片保存
bro.save_screenshot('./main.png')
#将验证码的局部区域进行裁剪
#捕获标签在页面中的位置信息
img_tag = bro.find_element_by_xpath('//*[@id="loginform"]/div/ul[2]/li[4]/div/div/div[3]/img')
location = img_tag.location#标签的起始位置坐标(左下角坐标)
size = img_tag.size#标签的尺寸
#裁剪范围对应的矩形区域
rangle = (int(location['x']),int(location['y']),int(location['x']+size['width']),int(location['y']+size['height']))
#使用image工具进行指定区域的裁剪
i = image.open('./main.png')
frame = i.crop(rangle)#crop就是根据指定的裁剪范围进行图片的截取
frame.save('code.png')

#调用打码平台进行验证码的识别
result = transformcode('./code.png',9004)
print(result) #x1,y1|x2,y2|x3,y3

#x1,y1|x2,y2|x3,y3 ==>[[x1,y1],[x2,y2],[x3,y3]]
all_list = []#[[x1,y1],[x2,y2],[x3,y3]]    验证码中图片中的点击内容的坐标
if '|' in result:
    list_1 = result.split('|')
    count_1 = len(list_1)
    for i in range(count_1):
        xy_list = []
        x = int(list_1[i].split(',')[0])
        y = int(list_1[i].split(',')[1])
        xy_list.append(x)
        xy_list.append(y)
        all_list.append(xy_list)
else:
    x = int(result.split(',')[0])
    y = int(result.split(',')[1])
    xy_list = []
    xy_list.append(x)
    xy_list.append(y)
    all_list.append(xy_list)


for point in all_list:
    x = point[0]
    y = point[1]
    actionchains(bro).move_to_element_with_offset(img_tag,x,y).click().perform()
    sleep(1)


bro.find_element_by_id('username').send_keys('xxxxxx')  #12306用户名
sleep(1)
bro.find_element_by_id('password').send_keys('xxxx')   #密码
sleep(1)

bro.find_element_by_id('loginsub').click()  #点击事件,登录

sleep(10)
print(bro.page_source)
bro.quit()

上一篇:

下一篇: