java实现百度云OCR文字识别 高精度OCR识别身份证信息
程序员文章站
2024-03-01 09:44:52
本文为大家分享了java实现百度云ocr识别的具体代码,高精度ocr识别身份证信息,供大家参考,具体内容如下
1.通用ocr文字识别
这种ocr只能按照识别图片中的文字...
本文为大家分享了java实现百度云ocr识别的具体代码,高精度ocr识别身份证信息,供大家参考,具体内容如下
1.通用ocr文字识别
这种ocr只能按照识别图片中的文字,且是按照行识别返回结果,精度较低。
首先引入依赖包:
<dependency> <groupid>com.baidu.aip</groupid> <artifactid>java-sdk</artifactid> <version>4.6.0</version> </dependency>
通过ocr工具类:
package util; import com.baidu.aip.ocr.aipocr; import org.json.jsonobject; import java.util.hashmap; public class ocrapi { private static final string app_id = "你的 app id"; private static final string api_key = "xb12m5t4js2n7"; private static final string secret_key = "9xvx9gpcsbsutz"; private static aipocr getaipclient() { return getaipclient(api_key, secret_key); } public static aipocr getaipclient(string apikey, string secretkey) { aipocr client = new aipocr(app_id, apikey, secretkey); // 可选:设置网络连接参数 client.setconnectiontimeoutinmillis(2000); client.setsockettimeoutinmillis(60000); return client; } public static string result(aipocr client) { // 传入可选参数调用接口 hashmap<string, string> options = new hashmap<>(); options.put("language_type", "chn_eng"); options.put("detect_direction", "true"); options.put("detect_language", "true"); options.put("probability", "true"); jsonobject res = client.basicgeneralurl( "https://lichunyu1234.oss-cn-shanghai.aliyuncs.com/1.png", options); return res.tostring(2); } public static void main(string[] args) { system.out.println(result(getaipclient())); } }
结果如下,识别有两行信息(words即是识别的信息):
2.高精度ocr识别身份证信息
这种就比较高精度,且按照分类显示,返回数据更友好,高可用。
2.1 接口说明及请求参数是地址官方截图如下:
2.2 ocr身份证识别工具类
package util; import com.alibaba.druid.util.base64; import com.alibaba.fastjson.jsonobject; import java.io.*; import java.net.*; import java.nio.charset.standardcharsets; import java.util.list; import java.util.map; public class ocrutil { // access_token获取 private static final string access_token_host = "https://aip.baidubce.com/oauth/2.0/token?"; // 身份证识别请求url private static final string ocr_host = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?"; // apikey,secretkey private static final string api_key ="xb12m5t4js"; private static final string secret_key = "9xvx9gpcsbsut"; // 获取百度云ocr的授权access_token public static string getaccesstoken() { return getaccesstoken(api_key, secret_key); } /** * 获取百度云ocr的授权access_token * @param apikey * @param secretkey * @return */ public static string getaccesstoken(string apikey, string secretkey) { string accesstokenurl = access_token_host // 1. grant_type为固定参数 + "grant_type=client_credentials" // 2. 官网获取的 api key + "&client_id=" + apikey // 3. 官网获取的 secret key + "&client_secret=" + secretkey; try { url url = new url(accesstokenurl); // 打开和url之间的连接 httpurlconnection connection = (httpurlconnection) url.openconnection(); connection.setrequestmethod("get"); connection.connect(); // 获取响应头 map<string, list<string>> map = connection.getheaderfields(); // 遍历所有的响应头字段 for (string key : map.keyset()) { system.out.println(key + "---->" + map.get(key)); } // 定义 bufferedreader输入流来读取url的响应 bufferedreader bufferedreader = new bufferedreader(new inputstreamreader(connection.getinputstream())); stringbuilder result = new stringbuilder(); string inputline; while ((inputline = bufferedreader.readline()) != null) { result.append(inputline); } jsonobject jsonobject = jsonobject.parseobject(result.tostring()); return jsonobject.getstring("access_token"); } catch (exception e) { e.printstacktrace(); system.err.print("获取access_token失败"); } return null; } /** * 获取身份证识别后的数据 * @param imageurl * @param idcardside * @return */ public static string getstringidentitycard(file imageurl, string idcardside) { // 身份证ocr的http url+鉴权token string ocrurl = ocr_host+"access_token="+getaccesstoken(); system.out.println(ocrurl); system.out.println("***************************************************"); system.out.println(getaccesstoken()); // 对图片进行base64处理 string image = encodeimagetobase64(imageurl); // 请求参数 string requestparam = "detect_direction=true&id_card_side="+idcardside+"&image="+image; try { // 请求ocr地址 url url = new url(ocrurl); httpurlconnection connection = (httpurlconnection) url.openconnection(); // 设置请求方法为post connection.setrequestmethod("post"); // 设置请求头 connection.setrequestproperty("content-type", "application/x-www-form-urlencoded"); connection.setrequestproperty("apikey", api_key); connection.setdooutput(true); connection.getoutputstream().write(requestparam.getbytes(standardcharsets.utf_8)); connection.connect(); // 定义 bufferedreader输入流来读取url的响应 bufferedreader bufferedreader = new bufferedreader(new inputstreamreader(connection.getinputstream(), standardcharsets.utf_8)); stringbuilder result = new stringbuilder(); string inputline; while ((inputline = bufferedreader.readline()) != null) { result.append(inputline); } bufferedreader.close(); return result.tostring(); } catch (exception e) { e.printstacktrace(); system.err.println("身份证ocr识别异常"); return null; } } /** * 对图片url进行base64编码处理 * @param imageurl * @return */ public static string encodeimagetobase64(file imageurl) { // 将图片文件转化为字节数组字符串,并对其进行base64编码处理 byte[] data = null; try { inputstream inputstream = new fileinputstream(imageurl); data = new byte[inputstream.available()]; inputstream.read(data); inputstream.close(); // 对字节数组base64编码 return urlencoder.encode(base64.bytearraytobase64(data), "utf-8"); } catch (exception e) { e.printstacktrace(); return null; } } /** * 提取ocr识别身份证有效信息 * @param * @return */ public static map<string, string> getidcardinfo(multipartfile image, int idcardside) { string value = getstringidentitycard(image, idcardside); string side; if (idcardside == 1) { side = "正面"; }else { side = "背面"; } map<string, string> map = new hashmap<>(); jsonobject jsonobject = jsonobject.parseobject(value); jsonobject words_result = jsonobject.getjsonobject("words_result"); if (words_result == null || words_result.isempty()) { throw new myexception("请提供身份证"+side+"图片"); } for (string key : words_result.keyset()) { jsonobject result = words_result.getjsonobject(key); string info = result.getstring("words"); switch (key) { case "姓名": map.put("name", info); break; case "性别": map.put("sex", info); break; case "民族": map.put("nation", info); break; case "出生": map.put("birthday", info); break; case "住址": map.put("address", info); break; case "公民身份号码": map.put("idnumber", info); break; case "签发机关": map.put("issuedorganization", info); break; case "签发日期": map.put("issuedat", info); break; case "失效日期": map.put("expiredat", info); break; } } return map; } }
官方返回示例:
对于身份证识别有个大坑:
1.有的base64编码后有头部“base64:”要去掉,阿里巴巴的base64可以正常使用。
2.ocr识别官方只说明图片要base64编码,但是实际上还是要再urlencode再编码一次才可以。
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
下一篇: Java定时任务的三种实现方式