python简单验证码识别的实现方法

程序员文章站 2023-01-10 21:08:14

利用svm向量机进行4位数字验证码识别主要是思路和步骤如下：一，素材收集检查环境是否包...

利用svm向量机进行4位数字验证码识别

主要是思路和步骤如下：

一，素材收集

检查环境是否包含有相应的库：

1.在cmd中，通过 pip list命令查看安装的库

2.再使用pip installrequests 安装requests库

3.再次使用pip list 命令

python简单验证码识别的实现方法

4.利用python获取验证码资源

编写代码：_downloadpic.py

#!/usr/bin/nev python3
#利用python从站点下载验证码图片

import requests

## 1.在 http://www.xxx.com
# 获取验证码url
def downloads_pic(strpath, strname):
 #设置url
 url = 'http://www.xxx.com'

 #以二进制方式发送get请求，
 #将stream = true，
 #数据读取完成前不要断开链接
 rreq = requests.get(url, stream = true)

 #尝试保存图片
 with open(strpath + strname + '.png', 'wb') as fppic:
  #循环读取1024byte到bychunk中，读完则跳出
  for bychunk in rreq.iter_content(chunk_size = 1024):
   if bychunk:
    fppic.write(bychunk)
    fppic.flush()
  fppic.close()

for i in range(1, 10 + 1):
 strfilename = "%03d" % i
 downloads_pic('d:/1/', strfilename)

二，素材处理

1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白

2.去除噪点

3.切割图片

python简单验证码识别的实现方法

编写代码：_picdealwith.py

#!/usr/bin/env python3
import os
import os.path
from pil import image, imageenhance, imagefilter
import random

#二值化处理
#strimgpath 图片路径
def binaryzationimg(strimgpath):
 #打开图片
 imgoriimg = image.open(strimgpath)

 #增加对比度
 pocenhance = imageenhance.contrast(imgoriimg)
 #增加255%对比度
 imgoriimg = pocenhance.enhance(2.55)

 #锐化
 pocenhance = imageenhance.sharpness(imgoriimg)
 #锐化200%
 imgoriimg = pocenhance.enhance(2.0)

 #增加亮度
 pocenhance = imageenhance.brightness(imgoriimg)
 #增加200%
 imgoriimg = pocenhance.enhance(2.0)

 #添加滤镜效果
 imggryimg = imgoriimg.convert('l').filter(imagefilter.detail)

 #二值化处理
 imgbinimg = imggryimg.convert('1')

 return imgbinimg

#去除噪点
def clearnoise(imgbinimg):
 for x in range(1, (imgbinimg.size[0]-1)):
  for y in range(1,(imgbinimg.size[1] - 1)):
  #一个点为黑色，周围8个点为白色，则此点为噪点，设置为白色
   if imgbinimg.getpixel((x, y)) == 0 \
    and imgbinimg.getpixel(((x - 1), (y + 1))) == 255 \
    and imgbinimg.getpixel(((x - 1), y)) == 255 \
    and imgbinimg.getpixel(((x - 1), (y - 1))) == 255 \
    and imgbinimg.getpixel(((x + 1), (y + 1))) == 255 \
    and imgbinimg.getpixel(((x + 1), y)) == 255 \
    and imgbinimg.getpixel(((x + 1), (y - 1))) == 255 \
    and imgbinimg.getpixel((x, (y + 1))) == 255 \
    and imgbinimg.getpixel((x, (y - 1))) == 255:
    imgbinimg.putpixel([x, y], 255)

 return imgbinimg

#切割图片
def getcropimgs(imgclrimg):
 imglist = []
 for i in range(4):
  x = 6 + i * 13
  y = 3
  subimg = imgclrimg.crop((x, y, x + 13, y + 15))
  imglist.append(subimg)
 return imglist


#调用部分
def main():
 g_count = 0
 strstep1dir = 'd:/1/step1/'
 strstep2dir = 'd:/1/step2/'
 for parentpath, dirname, filenames in os.walk(strstep1dir):
  for i in filenames:
   #图片文件路径信息
   strfullpath = os.path.join(parentpath, i)
   imgbinimg = binaryzationimg(strfullpath)
   imgclrimg = clearnoise(imgbinimg)
   imglist = getcropimgs(imgclrimg)
   for img in imglist:
    strimgname = "%04d%04d.png" % (g_count, random.randint(0, 9999))
    strimgpath = os.path.join(strstep2dir, strimgname)
    img.save(strimgpath)
    g_count += 1

 print("ok！")

if __name__ == '__mian__':
 main()

三，手工分类

将第二步切割好的图片进行分类，体力活

python简单验证码识别的实现方法

四，利用svm向量机建立模型

1.安装svm库

下载libsvm库，并解压

将库中的windows目录的路径添加到path环境变量中

将libsvm下的python文件夹中的svm.py和svmutil.py文件拷贝到你的python的路径中lib文件夹中

from svmutil import *

2.生成模型文件

2.1.将分好类的图片信息进行提取，生成特征值

2.2.输出向量数据

python简单验证码识别的实现方法

2.3.根据数据输出svm模型文件

python简单验证码识别的实现方法

编写代码：_svmdemo.py

#!/usr/bin/env python3

#svm,验证码识别

import os
import sys
import random
import os.path
from pil import image, imageenhance, imagefilter
from svmutil import *

##记录像素点的值，描述特征，采用遍历每个像素点统计黑色点的数量
def getfeature(imgcropimg, nimgheight, nimgwidth):
 pixelcountlist = []
 for y in range(nimgheight):
  countx = 0
  for x in range(nimgwidth):
   if imgcropimg.getpixel((x, y)) == 0:
    countx += 1
  pixelcountlist.append(countx)
 for x in range(nimgwidth):
  county = 0
  for y in range(nimgheight):
   if imgcropimg.getpixel((x, y)) == 0:
    county += 1
  pixelcountlist.append(county)
 return pixelcountlist

##输出向量数据
def outputvectordata(strid, strmaterialdir, stroutpath):
 for parentpath, dirnames, filenames in os.walk(strmaterialdir):
  with open(stroutpath, 'a') as fpfea:
   for fp in filenames:
    #图片文件路径信息
    strfullpath = os.path.join(parentpath, fp)

    #打开图片
    imgoriimg = image.open(strfullpath)

    #生成特征值
    featurelist = getfeature(imgoriimg, 15, 13)

    strfeature = strid + ' '
    ncount = 1
    for i in featurelist:
     strfeature = '%s%d:%d ' % (strfeature, ncount, i)
     ncount += 1
    fpfea.write(strfeature + '\n')
    fpfea.flush()
  fpfea.close()

#训练svm模型
def trainsvmmodel(strproblempath, strmodelpath):
 y, x = svm_read_problem(strproblempath)
 model = svm_train(y, x)
 svm_save_model(strmodelpath, model)

#svm模型测试
def svmmodeltest(strproblempath, strmodelpath):
 testy, testx = svm_read_problem(strproblempath)
 model = svm_load_model(strmodelpath)
 #返回识别结果
 plabel, pacc, pval = svm_predict(testy, testx, model)
 return plabel


##输出测试向量数据
def outputtestvectordata(strid, strdir, stroutpath):
 filelist = []
 for parentpath, strdir, filename in os.walk(strdir):
  filelist = filename
 with open(stroutpath, 'a') as fpfea:
  for fp in filelist:
   #图片文件路径信息
   strfullpath = os.path.join(parentpath, fp)

   #打开图片
   imgoriimg = image.open(strfullpath)

   #生成特征值
   featurelist = getfeature(imgoriimg, 15, 13)

   strfeature = strid + ' '
   ncount = 1
   for i in featurelist:
    strfeature = '%s%d:%d ' % (strfeature, ncount, i)
    ncount += 1
   fpfea.write(strfeature + '\n')
   fpfea.flush()
  fpfea.close()


def main():
# 1.循环输出向量文件
 for i in range(0, 10):
  strid = '%d' % i
  outputvectordata(strid, 'd:/1/step3/' + strid, 'd:/1/step4/vector.txt')

# 2.调用函数训练svm模型
 trainsvmmodel('d:/1/step4/vector.txt', 'd:/1/step5/model.txt')
# 3.调用函数识别结果
 plabel = svmmodeltest('d:/1/step6/vector.txt', 'd:/1/step5/model.txt')
 for i in plabel:
  print('%d' % i)



if __name__ == '__main__':
 main()

五，测试

1.利用模型文件和向量文件进行测试验证码识别

##1.获取一张验证码图片

##2.对图片进行处理

## 2.1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白，

## 2.2.去除噪点

## 2.3.切割图片

##3.生成向量文件

##4.再利用之前的模型文件进行识别测试

编写代码：_svmtest.py

#!/usr/bin/env python3
#对一张验证码图片进行识别测试

##1.获取一张验证码图片
##2.对图片进行处理
## 2.1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白，
## 2.2.去除噪点
## 2.3.切割图片
##3.生成向量文件
##4.再利用之前的模型文件进行识别测试

################
import _picdealwith
import os
import random
import _svmdemo


##测试
g_count = 0
strdirpath = 'd:/1/test/'
strfilename = '001.png'
#1.图片文件路径信息
strfullpath = os.path.join(strdirpath, strfilename)
#2.对图片进行处理
#2.1二值化处理
imgbinimg = _picdealwith.binaryzationimg(strfullpath)
#2.2去除噪点
imgclrimg = _picdealwith.clearnoise(imgbinimg)
#2.3切割图片
imglist = _picdealwith.getcropimgs(imgclrimg)
#2.3循环写入文件
for img in imglist:
 strimgname = "%04d%04d.png" % (g_count, random.randint(0, 9999))
 strimgpath = os.path.join(strdirpath, strimgname)
 img.save(strimgpath)
 g_count += 1

print("ok")

os.remove(strfullpath)

#3.生成向量文件
_svmdemo.outputtestvectordata('0', 'd:/1/test/', 'd:/1/test/vector.txt')

#4.利用之前的模型文件进行识别测试
plabel = _svmdemo.svmmodeltest('d:/1/test/vector.txt', 'd:/1/step5/model.txt')
for i in plabel:
 print('%d' % i, end = '')

效果图:

python简单验证码识别的实现方法

总结

以上就是这篇文章的全部内容了，希望本文的内容对大家的学习或者工作具有一定的参考学习价值，谢谢大家对的支持。

上一篇： php字符串截取问题

下一篇： php类

python简单验证码识别的实现方法

Python实现一个简单的验证码程序

Python实现简单状态框架的方法

Python使用shelve模块实现简单数据存储的方法

Python实现简单字典树的方法

python下调用pytesseract识别某网站验证码的实现方法

Android中验证码倒计时的简单实现方法示例

Python单链表的简单实现方法

Python数组遍历的简单实现方法小结

Python简单实现enum功能的方法

Python基于jieba库进行简单分词及词云功能实现方法