欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬虫_12306登录验证码

程序员文章站 2022-05-08 10:56:57
...

代码仅限于技术交流

# -*- encoding: utf-8 -*-
# 12306登录
    # 第一步验证图片验证码是否正确
        # 1,验证点击是根据像素判断是否点击到正确的区域
        # 2,可以手动制作好每个区域的中间位置大概像素(x,y)
        # 3, 注意图片像素点y轴需要减去图片文字区域的大约30像素(x,y-30)
    # 第二步验证账号密码

import requests

from PIL import Image
import matplotlib.pyplot as plt

class Login(object):
    def __init__(self):
        # 实例化session,自动携带cookie
        self.session = requests.session()
        # headers
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
        }
        # 获取登录页面
        self.login_url = 'https://kyfw.12306.cn/otn/login/init'
        # 验证码下载链接
        self.load_image_url = 'https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login'
        # 'https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&0.8715477478180387'
        # 验证码图片链接
        self.captcha_check_url = 'https://kyfw.12306.cn/passport/captcha/captcha-check'
        # 密码登录
        self.user_login_url = 'https://kyfw.12306.cn/passport/web/login'

    def login(self):  # 登录界面
        # 暂时不需要登录页面信息
        pass

    def image_code_number(self,num):
        option = {
            '1': '40,40',
            '2': '110,40',
            '3': '180,40',
            '4': '260,40',
            '5': '40,110',
            '6': '100,110',
            '7': '180,40',
            '8': '260,40'
        }

        # 判断num长度
        # 点击验证码图像数量不一定,如果是1需要特殊处理
        check_num = []
        if len(num) == 1:
            check_num = option[num]
        else:
            image_num = num.split(',')  # num: '1,2'

            for i in image_num:
                check_num.append(option[i])
            check_num = ','.join(check_num)
        print(check_num)  # 40,40,110,40
        return check_num

    def load_verify_image(self):  # 下载图片验证图片
        '''
            answer: 120,40
            login_site: E
            rand: sjrand
        '''
        response = self.session.get(url=self.load_image_url,headers=self.headers)
        contents = response.content
        # 保存图片
        with open('check_image.jpg', 'wb') as f:
            f.write(contents)
        print('图片下载成功!')

    def show_image(self):
        # img = Image.open('check_image.jpg')
        # img.show()

        # img=Image.open('F:/heck_image.jpg')
        img = Image.open('check_image.jpg')
        # 设置多个figure,设置figure的标题
        plt.figure("check_image")
        plt.imshow(img)
        plt.show()

    def verify_image_code(self):
        num = input('请输入正确的验证码:')
        # 构建formdata
        data = {
            'answer': self.image_code_number(num),
            'login_site': 'E',
            'rand': 'sjrand'
        }
        response = self.session.post(self.captcha_check_url,data=data,headers=self.headers)

        print(response.text)

    def user_login(self):
        '''
            username: 133
            password: 12313123
            appid: otn
        '''
        data = {
            'username': '***',
            'password': '***',
            'appid': 'otn'
        }
        response = self.session.post(self.user_login_url, data=data, headers=self.headers)
        print(response.text)

    def run(self):  # 主要逻辑实现
        # 下载图片验证码
        self.load_verify_image()
        # 显示下载的图片
        self.show_image()
        # 验证图片验证码
        self.verify_image_code()
        # 登录
        self.user_login()


railway = Login()
railway.run()
相关标签: spider