首页 > 编程 > Java > 正文

java实现百度云OCR文字识别 高精度OCR识别身份证信息

2019-11-26 09:30:20
字体:
来源:转载
供稿:网友

本文为大家分享了java实现百度云OCR识别的具体代码,高精度OCR识别身份证信息,供大家参考,具体内容如下

1.通用OCR文字识别

这种OCR只能按照识别图片中的文字,且是按照行识别返回结果,精度较低。

首先引入依赖包:

<dependency>  <groupId>com.baidu.aip</groupId>  <artifactId>java-sdk</artifactId>  <version>4.6.0</version></dependency>

通过OCR工具类:

package util; import com.baidu.aip.ocr.AipOcr;import org.json.JSONObject;import java.util.HashMap;  public class OcrApi {  private static final String APP_ID = "你的 App ID";  private static final String API_KEY = "Xb12m5t4jS2n7";  private static final String SECRET_KEY = "9XVx9GPcSbSUTZ";   private static AipOcr getAipClient() {    return getAipClient(API_KEY, SECRET_KEY);  }   public static AipOcr getAipClient(String apiKey, String secretKey) {    AipOcr client = new AipOcr(APP_ID, apiKey, secretKey);    // 可选:设置网络连接参数    client.setConnectionTimeoutInMillis(2000);    client.setSocketTimeoutInMillis(60000);    return client;  }   public static String result(AipOcr client) {    // 传入可选参数调用接口    HashMap<String, String> options = new HashMap<>();    options.put("language_type", "CHN_ENG");    options.put("detect_direction", "true");    options.put("detect_language", "true");    options.put("probability", "true");     JSONObject res = client.basicGeneralUrl(        "https://lichunyu1234.oss-cn-shanghai.aliyuncs.com/1.png", options);    return res.toString(2);  }   public static void main(String[] args) {    System.out.println(result(getAipClient()));  }}

结果如下,识别有两行信息(words即是识别的信息):

2.高精度OCR识别身份证信息 

这种就比较高精度,且按照分类显示,返回数据更友好,高可用。

2.1 接口说明及请求参数是地址官方截图如下:

2.2 OCR身份证识别工具类

package util; import com.alibaba.druid.util.Base64;import com.alibaba.fastjson.JSONObject;import java.io.*;import java.net.*;import java.nio.charset.StandardCharsets;import java.util.List;import java.util.Map; public class OcrUtil {  // Access_Token获取  private static final String ACCESS_TOKEN_HOST = "https://aip.baidubce.com/oauth/2.0/token?";  // 身份证识别请求URL  private static final String OCR_HOST = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?";  // apiKey,secretKey  private static final String API_KEY ="Xb12m5t4jS";  private static final String SECRET_KEY = "9XVx9GPcSbSUT";    // 获取百度云OCR的授权access_token  public static String getAccessToken() {    return getAccessToken(API_KEY, SECRET_KEY);  }   /**   * 获取百度云OCR的授权access_token   * @param apiKey   * @param secretKey   * @return   */  public static String getAccessToken(String apiKey, String secretKey) {    String accessTokenURL = ACCESS_TOKEN_HOST        // 1. grant_type为固定参数        + "grant_type=client_credentials"        // 2. 官网获取的 API Key        + "&client_id=" + apiKey        // 3. 官网获取的 Secret Key        + "&client_secret=" + secretKey;     try {      URL url = new URL(accessTokenURL);      // 打开和URL之间的连接      HttpURLConnection connection = (HttpURLConnection) url.openConnection();      connection.setRequestMethod("GET");      connection.connect();       // 获取响应头      Map<String, List<String>> map = connection.getHeaderFields();      // 遍历所有的响应头字段      for (String key : map.keySet()) {        System.out.println(key + "---->" + map.get(key));      }       // 定义 BufferedReader输入流来读取URL的响应      BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream()));      StringBuilder result = new StringBuilder();      String inputLine;      while ((inputLine = bufferedReader.readLine()) != null) {        result.append(inputLine);      }      JSONObject jsonObject = JSONObject.parseObject(result.toString());      return jsonObject.getString("access_token");     } catch (Exception e) {      e.printStackTrace();      System.err.print("获取access_token失败");    }    return null;  }   /**   * 获取身份证识别后的数据   * @param imageUrl   * @param idCardSide   * @return   */  public static String getStringIdentityCard(File imageUrl, String idCardSide) {    // 身份证OCR的http URL+鉴权token    String OCRUrl = OCR_HOST+"access_token="+getAccessToken();    System.out.println(OCRUrl);    System.out.println("***************************************************");    System.out.println(getAccessToken());    // 对图片进行base64处理    String image = encodeImageToBase64(imageUrl);    // 请求参数    String requestParam = "detect_direction=true&id_card_side="+idCardSide+"&image="+image;     try {      // 请求OCR地址      URL url = new URL(OCRUrl);      HttpURLConnection connection = (HttpURLConnection) url.openConnection();      // 设置请求方法为POST      connection.setRequestMethod("POST");       // 设置请求头      connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");      connection.setRequestProperty("apiKey", API_KEY);      connection.setDoOutput(true);      connection.getOutputStream().write(requestParam.getBytes(StandardCharsets.UTF_8));      connection.connect();       // 定义 BufferedReader输入流来读取URL的响应      BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));      StringBuilder result = new StringBuilder();      String inputLine;      while ((inputLine = bufferedReader.readLine()) != null) {        result.append(inputLine);      }      bufferedReader.close();      return result.toString();    } catch (Exception e) {      e.printStackTrace();      System.err.println("身份证OCR识别异常");      return null;    }  }   /**   * 对图片url进行Base64编码处理   * @param imageUrl   * @return   */  public static String encodeImageToBase64(File imageUrl) {    // 将图片文件转化为字节数组字符串,并对其进行Base64编码处理    byte[] data = null;    try {      InputStream inputStream = new FileInputStream(imageUrl);      data = new byte[inputStream.available()];      inputStream.read(data);      inputStream.close();       // 对字节数组Base64编码      return URLEncoder.encode(Base64.byteArrayToBase64(data), "UTF-8");    } catch (Exception e) {      e.printStackTrace();      return null;    }   }   /**   * 提取OCR识别身份证有效信息   * @param   * @return   */  public static Map<String, String> getIdCardInfo(MultipartFile image, int idCardSide) {    String value = getStringIdentityCard(image, idCardSide);    String side;    if (idCardSide == 1) {      side = "正面";    }else {      side = "背面";    }    Map<String, String> map = new HashMap<>();    JSONObject jsonObject = JSONObject.parseObject(value);    JSONObject words_result = jsonObject.getJSONObject("words_result");    if (words_result == null || words_result.isEmpty()) {      throw new MyException("请提供身份证"+side+"图片");    }    for (String key : words_result.keySet()) {      JSONObject result = words_result.getJSONObject(key);      String info = result.getString("words");      switch (key) {        case "姓名":          map.put("name", info);          break;        case "性别":          map.put("sex", info);          break;        case "民族":          map.put("nation", info);          break;        case "出生":          map.put("birthday", info);          break;        case "住址":          map.put("address", info);          break;        case "公民身份号码":          map.put("idNumber", info);          break;        case "签发机关":          map.put("issuedOrganization", info);          break;        case "签发日期":          map.put("issuedAt", info);          break;        case "失效日期":          map.put("expiredAt", info);          break;      }    }    return map;   } }

官方返回示例:

对于身份证识别有个大坑:

1.有的base64编码后有头部“Base64:”要去掉,阿里巴巴的base64可以正常使用。

2.OCR识别官方只说明图片要Base64编码,但是实际上还是要再UrlEncode再编码一次才可以。

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持武林网。

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表