验证码识别-Java版
程序员文章站
2022-07-03 19:16:57
前段时间用Java写了个爬虫爬教务处网站,于是有朋友问我是怎么实现验证码识别的,在此将这个小方法分享出来!...
前段时间用Java写了个爬虫爬教务处网站,于是有朋友问我是怎么实现验证码识别的,在此将这个小方法分享出来!
PS:Java也是可以做爬虫的哦~因为对Java熟悉些,所有就用Java写的,达成jar包放服务器,写个cron定时启动也是方便的很呢!
正文前述:关于验证码识别的算法,我之前也了解过一些,现在一般用卷积神经网络来做。虽然Github上相关做好的算法很多,但是这些模型也都还面临着一个问题,就是训练,我们只是拿来应用一下,为什么要做这么多无关的工作呢,而且初期的识别率还不高。所以我就去各大云市场找了一些验证码识别的API,最后在腾讯云找到一个比较好的接口,而且价格也算是合理的。1块钱100次,对自己做爬虫来说,完全是够了的。
本文不是做验证码识别算法,仅仅是一个应用和工具集的封装。方便你我他!
0x01.API的购买
- 在腾讯云中搜索验证码识别:
- 链接:https://market.cloud.tencent.com/products/21094
- 购买后可以获得secretId和secretKey。
0x02.工具集的封装
- 参考官方给的调用案例,并且这个接口需要的是图片的base64编码,所以对这些操作做了一些封装。
1.Base64工具集
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
@SuppressWarnings("restriction")
public class Base64Utils {
public static void main(String[] args) throws Exception {
//本地图片地址
String url = "D:\\DeskTop\\验证码识别\\code1.jpg";
//在线图片地址
String onlineUrl = "";
String imgBase64=Base64Utils.ImageToBase64ByOnline(onlineUrl);
System.out.println(imgBase64);
}
/**
* 本地图片转换成base64字符串
* @param imgFile 图片本地路径
* @return
*/
public static String ImageToBase64ByLocal(String imgFile) {// 将图片文件转化为字节数组字符串,并对其进行Base64编码处理
InputStream in = null;
byte[] data = null;
// 读取图片字节数组
try {
in = new FileInputStream(imgFile);
data = new byte[in.available()];
in.read(data);
in.close();
} catch (IOException e) {
e.printStackTrace();
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
return encoder.encode(data);// 返回Base64编码过的字节数组字符串
}
/**
* 在线图片转换成base64字符串
* @param imgURL 图片线上路径
* @return
*
*/
public static String ImageToBase64ByOnline(String imgURL) {
ByteArrayOutputStream data = new ByteArrayOutputStream();
try {
// 创建URL
URL url = new URL(imgURL);
byte[] by = new byte[1024];
// 创建链接
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
InputStream is = conn.getInputStream();
// 将内容读取内存中
int len = -1;
while ((len = is.read(by)) != -1) {
data.write(by, 0, len);
}
// 关闭流
is.close();
} catch (IOException e) {
e.printStackTrace();
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
return encoder.encode(data.toByteArray());
}
/**
* base64字符串转换成图片
* @param imgStr base64字符串
* @param imgFilePath 图片存放路径
* @return
*/
public static boolean Base64ToImage(String imgStr,String imgFilePath) { // 对字节数组字符串进行Base64解码并生成图片
if (imgStr==null||imgStr=="") // 图像数据为空
return false;
BASE64Decoder decoder = new BASE64Decoder();
try {
// Base64解码
byte[] b = decoder.decodeBuffer(imgStr);
for (int i = 0; i < b.length; ++i) {
if (b[i] < 0) {// 调整异常数据
b[i] += 256;
}
}
OutputStream out = new FileOutputStream(imgFilePath);
out.write(b);
out.flush();
out.close();
return true;
} catch (Exception e) {
return false;
}
}
}
2.验证码接口调用
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.security.InvalidKeyException;
import java.security.Key;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import sun.misc.BASE64Encoder;
public class VcodeIdentifi {
//云市场分配的密钥Id
public static final String secretId = "";
//云市场分配的密钥Key
public static final String secretKey = "";
public static final String source = "market";
//验证码的位数
public static final String number="4";
//ne:英文数字组合,dn:纯数字,de:纯英文
public static final String pri_id="ne";
public static String VcodeIdentification(String IMG_BASE64) {
Calendar cd = Calendar.getInstance();
SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US);
sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
String datetime = sdf.format(cd.getTime());
// 签名
String auth = null;
try {
auth = calcAuthorization(source, secretId, secretKey, datetime);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (InvalidKeyException e) {
e.printStackTrace();
}
// 请求方法
String method = "POST";
// 请求头
Map<String, String> headers = new HashMap<String, String>();
headers.put("X-Source", source);
headers.put("X-Date", datetime);
headers.put("Authorization", auth);
// 查询参数
Map<String, String> queryParams = new HashMap<String, String>();
// body参数
Map<String, String> bodyParams = new HashMap<String, String>();
bodyParams.put("number", number);
bodyParams.put("pri_id", pri_id);
bodyParams.put("v_pic", IMG_BASE64);
// url参数拼接
String url = "http://service-98wvmcga-1256810135.ap-guangzhou.apigateway.myqcloud.com/release/yzm";
if (!queryParams.isEmpty()) {
try {
url += "?" + urlencode(queryParams);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
BufferedReader in = null;
try {
URL realUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
conn.setConnectTimeout(5000);
conn.setReadTimeout(5000);
conn.setRequestMethod(method);
// request headers
for (Map.Entry<String, String> entry : headers.entrySet()) {
conn.setRequestProperty(entry.getKey(), entry.getValue());
}
// request body
Map<String, Boolean> methods = new HashMap<>();
methods.put("POST", true);
methods.put("PUT", true);
methods.put("PATCH", true);
Boolean hasBody = methods.get(method);
if (hasBody != null) {
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setDoOutput(true);
DataOutputStream out = new DataOutputStream(conn.getOutputStream());
out.writeBytes(urlencode(bodyParams));
out.flush();
out.close();
}
// 定义 BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line;
String result = "";
while ((line = in.readLine()) != null) {
result += line;
}
//System.out.println(result);
return result;
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
} finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
return null;
}
public static String calcAuthorization(String source, String secretId, String secretKey, String datetime)
throws NoSuchAlgorithmException, UnsupportedEncodingException, InvalidKeyException {
String signStr = "x-date: " + datetime + "\n" + "x-source: " + source;
Mac mac = Mac.getInstance("HmacSHA1");
Key sKey = new SecretKeySpec(secretKey.getBytes("UTF-8"), mac.getAlgorithm());
mac.init(sKey);
byte[] hash = mac.doFinal(signStr.getBytes("UTF-8"));
String sig = new BASE64Encoder().encode(hash);
String auth = "hmac id=\"" + secretId + "\", algorithm=\"hmac-sha1\", headers=\"x-date x-source\", signature=\"" + sig + "\"";
return auth;
}
public static String urlencode(Map<?, ?> map) throws UnsupportedEncodingException {
StringBuilder sb = new StringBuilder();
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (sb.length() > 0) {
sb.append("&");
}
sb.append(String.format("%s=%s",
URLEncoder.encode(entry.getKey().toString(), "UTF-8"),
URLEncoder.encode(entry.getValue().toString(), "UTF-8")
));
}
return sb.toString();
}
}
3.测试
public class Main {
public static void main(String[] args) {
String localUrl="D:\\DeskTop\\验证码识别\\code2.jpg";
String imgBase64=Base64Utils.ImageToBase64ByLocal(localUrl);
//String onileUrl="http://*.edu.cn/validateCodeAction.do?random=0.41074290098962263";
//String imgBase64=Base64Utils.ImageToBase64ByOnline(onileUrl);
String result=VcodeIdentifi.VcodeIdentification(imgBase64);
System.out.println(result);
}
}
- 返回的是json字符串,后续还可以根据需求做进一步的结果处理。
本文地址:https://blog.csdn.net/ATFWUS/article/details/112858757