package com.ruoyi.system.utils; import com.aliyun.ocr_api20210707.Client; import com.aliyun.ocr_api20210707.models.RecognizeAllTextRequest; import com.aliyun.ocr_api20210707.models.RecognizeAllTextResponse; import com.aliyun.ocr_api20210707.models.RecognizeHandwritingRequest; import com.aliyun.ocr_api20210707.models.RecognizeHandwritingResponse; import com.aliyun.tea.TeaException; import com.aliyun.teaopenapi.models.Config; import com.aliyun.teautil.models.RuntimeOptions; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.ruoyi.system.config.OCRConfig; import lombok.extern.slf4j.Slf4j; import org.apache.commons.codec.binary.Base64; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; /** * 阿里云OCR工具类 - 通用文字识别 * 支持多种识别类型:通用文字、发票、身份证、手写体等 * 使用 ocr-api20210707 SDK * * 使用示例: * // 通用文字识别 * JSONObject result = AliOCRUtil.recognizeGeneral("https://example.com/image.jpg"); * * // 本地图片识别 * File imageFile = new File("path/to/image.jpg"); * JSONObject result = AliOCRUtil.recognizeGeneral(imageFile); * * // 手写体识别 * JSONObject result = AliOCRUtil.recognizeHandwriting(imageFile); * * // 提取关键字段 * Map fields = AliOCRUtil.extractTargetFields(result); */ @Component public class AliOCRUtil { private static final Logger log = LoggerFactory.getLogger(AliOCRUtil.class); private static OCRConfig staticOcrConfig; @Autowired public void setOcrConfig(OCRConfig ocrConfig) { AliOCRUtil.staticOcrConfig = ocrConfig; } // OCR API 端点 private static final String ENDPOINT = "ocr-api.cn-hangzhou.aliyuncs.com"; /** * 创建阿里云OCR客户端 * @return OCR客户端实例 * @throws Exception 创建失败时抛出异常 */ private static Client createClient() throws Exception { Config config = new Config() .setAccessKeyId(staticOcrConfig.getAccessKeyId()) .setAccessKeySecret(staticOcrConfig.getAccessKeySecret()); config.endpoint = ENDPOINT; // 设置连接超时时间 config.connectTimeout = 10000; // 10秒 config.readTimeout = 30000; // 30秒 return new Client(config); } /** * 创建阿里云OCR客户端(支持外部传入AK/SK) * @param accessKeyId 阿里云AccessKey ID * @param accessKeySecret 阿里云AccessKey Secret * @return OCR客户端实例 * @throws Exception 创建失败时抛出异常 */ public static Client createClient(String accessKeyId, String accessKeySecret) throws Exception { Config config = new Config() .setAccessKeyId(accessKeyId) .setAccessKeySecret(accessKeySecret); config.endpoint = ENDPOINT; // 设置连接超时时间 config.connectTimeout = 10000; // 10秒 config.readTimeout = 30000; // 30秒 return new Client(config); } /** * 将图片文件转为Base64编码 * @param imageFile 图片文件 * @return Base64编码字符串 * @throws IOException 读取文件失败 */ public static String imageToBase64(File imageFile) throws IOException { try (FileInputStream fis = new FileInputStream(imageFile)) { byte[] buffer = new byte[(int) imageFile.length()]; fis.read(buffer); return Base64.encodeBase64String(buffer); } } /** * 调用阿里云通用文字识别OCR接口(使用图片URL) * @param imageUrl 图片URL地址 * @param type 识别类型(如:"Invoice"-发票, "IdCard"-身份证, "General"-通用, "HandWriting"-手写体) * @return OCR识别结果(JSON格式) */ public static JSONObject recognizeTextByUrl(String imageUrl, String type) { try { Client client = createClient(); RecognizeAllTextRequest request = new RecognizeAllTextRequest() .setUrl(imageUrl) .setType(type); RuntimeOptions runtime = new RuntimeOptions(); RecognizeAllTextResponse response = client.recognizeAllTextWithOptions(request, runtime); // 将响应转为JSONObject String responseJson = com.aliyun.teautil.Common.toJSONString(response.body.data); JSONObject result = JSON.parseObject(responseJson); result.put("success", true); log.info("OCR识别成功,类型: {}, URL: {}", type, imageUrl); return result; } catch (TeaException error) { log.error("OCR识别失败(TeaException): {}", error.getMessage(), error); JSONObject errorResult = new JSONObject(); errorResult.put("success", false); errorResult.put("error", error.getMessage()); if (error.getData() != null) { errorResult.put("recommend", error.getData().get("Recommend")); } return errorResult; } catch (Exception error) { log.error("OCR识别失败(Exception): {}", error.getMessage(), error); // 构建详细的错误信息 JSONObject errorResult = new JSONObject(); errorResult.put("success", false); // 判断错误类型 String errorMsg = error.getMessage(); if (errorMsg != null && errorMsg.contains("ocr-api.cn-hangzhou.aliyuncs.com")) { errorResult.put("error", "网络连接失败:无法访问阿里云OCR服务"); errorResult.put("detail", "请检查:1) 网络连接是否正常 2) DNS解析是否可用 3) 防火墙设置 4) 代理配置"); errorResult.put("endpoint", ENDPOINT); } else if (error instanceof java.io.FileNotFoundException) { errorResult.put("error", "文件不存在: " + imageUrl); } else if (error instanceof java.io.IOException) { errorResult.put("error", "文件读取失败: " + errorMsg); } else { errorResult.put("error", errorMsg != null ? errorMsg : "未知错误"); } return errorResult; } } /** * 调用阿里云通用文字识别OCR接口(使用本地图片文件) * @param imageFile 图片文件 * @param type 识别类型(如:"Invoice"-发票, "IdCard"-身份证, "General"-通用, "HandWriting"-手写体) * @return OCR识别结果(JSON格式) */ public static JSONObject recognizeTextByFile(File imageFile, String type) { FileInputStream fis = null; try { // 直接读取图片文件为字节流 fis = new FileInputStream(imageFile); Client client = createClient(); RecognizeAllTextRequest request = new RecognizeAllTextRequest() .setBody(fis) // 直接传入文件流 .setType(type); JSONObject result; RuntimeOptions runtime = new RuntimeOptions(); if(type.equals("HandWriting")){//处理手写{ RecognizeHandwritingRequest handwritingRequest=new RecognizeHandwritingRequest(); handwritingRequest.setBody(fis); handwritingRequest.setNeedSortPage(true);//从上到下,从左到右 RecognizeHandwritingResponse response = client.recognizeHandwriting(handwritingRequest); String responseJson = com.aliyun.teautil.Common.toJSONString(response.body.data); result = JSON.parseObject(responseJson); result.put("success", true); }else { RecognizeAllTextResponse response = client.recognizeAllTextWithOptions(request, runtime); // 将响应转为JSONObject String responseJson = com.aliyun.teautil.Common.toJSONString(response.body.data); result = JSON.parseObject(responseJson); result.put("success", true); } log.info("OCR识别成功,类型: {}, 文件: {} 结果:{}", type, imageFile.getName(),result); return result; } catch (TeaException error) { log.error("OCR识别失败(TeaException): {}", error.getMessage(), error); JSONObject errorResult = new JSONObject(); errorResult.put("success", false); errorResult.put("error", error.getMessage()); if (error.getData() != null) { errorResult.put("recommend", error.getData().get("Recommend")); } return errorResult; } catch (Exception error) { log.error("OCR识别失败(Exception): {}", error.getMessage(), error); // 构建详细的错误信息 JSONObject errorResult = new JSONObject(); errorResult.put("success", false); // 判断错误类型 String errorMsg = error.getMessage(); if (errorMsg != null && errorMsg.contains("ocr-api.cn-hangzhou.aliyuncs.com")) { errorResult.put("error", "网络连接失败:无法访问阿里云OCR服务"); errorResult.put("detail", "请检查:1) 网络连接是否正常 2) DNS解析是否可用 3) 防火墙设置 4) 代理配置"); errorResult.put("endpoint", ENDPOINT); } else if (error instanceof java.io.FileNotFoundException) { errorResult.put("error", "文件不存在: " + imageFile.getAbsolutePath()); } else if (error instanceof java.io.IOException) { errorResult.put("error", "文件读取失败: " + errorMsg); } else { errorResult.put("error", errorMsg != null ? errorMsg : "未知错误"); } return errorResult; } finally { // 关闭文件流 if (fis != null) { try { fis.close(); } catch (IOException e) { log.error("关闭文件流失败", e); } } } } /** * 识别类型枚举 */ public enum OcrType { GENERAL("General", "通用文字识别"), INVOICE("Invoice", "发票识别"), IDCARD("IdCard", "身份证识别"), HANDWRITING("HandWriting", "手写体识别"); private final String code; private final String desc; OcrType(String code, String desc) { this.code = code; this.desc = desc; } public String getCode() { return code; } public String getDesc() { return desc; } } /** * 通用文字识别 - 手写体识别 * @param imageUrl 图片URL * @return OCR识别结果 */ public static JSONObject recognizeHandwriting(String imageUrl) { return recognizeTextByUrl(imageUrl, "HandWriting"); } /** * 通用文字识别 - 手写体识别 - 本地文件 * @param imageFile 图片文件 * @return OCR识别结果 */ public static JSONObject recognizeHandwriting(File imageFile) { return recognizeTextByFile(imageFile, "HandWriting"); } /** * 通用文字识别 * @param imageUrl 图片URL * @return OCR识别结果 */ public static JSONObject recognizeGeneral(String imageUrl) { return recognizeTextByUrl(imageUrl, "General"); } /** * 通用文字识别 - 本地文件 * @param imageFile 图片文件 * @return OCR识别结果 */ public static JSONObject recognizeGeneral(File imageFile) { return recognizeTextByFile(imageFile, "General"); } /** * 识别通用票据(发票、收据等) * @param imageUrl 图片URL * @return OCR识别结果 */ public static JSONObject recognizeInvoice(String imageUrl) { return recognizeTextByUrl(imageUrl, "Invoice"); } /** * 识别通用票据(发票、收据等)- 本地文件 * @param imageFile 图片文件 * @return OCR识别结果 */ public static JSONObject recognizeInvoice(File imageFile) { return recognizeTextByFile(imageFile, "Invoice"); } /** * 识别身份证 * @param imageUrl 图片URL * @return OCR识别结果 */ public static JSONObject recognizeIdCard(String imageUrl) { return recognizeTextByUrl(imageUrl, "IdCard"); } /** * 识别身份证 - 本地文件 * @param imageFile 图片文件 * @return OCR识别结果 */ public static JSONObject recognizeIdCard(File imageFile) { return recognizeTextByFile(imageFile, "IdCard"); } /** * 从OCR结果中提取目标字段(金额、日期、备注等) * @param ocrResult OCR识别的原始结果 * @return 提取后的目标字段 */ public static Map extractTargetFields(JSONObject ocrResult) { Map extracted = new HashMap<>(); // 校验OCR结果是否有效 if (!ocrResult.containsKey("success") || !ocrResult.getBooleanValue("success")) { extracted.put("error", ocrResult.getString("error")); return extracted; } // 获取识别的文字内容 if (!ocrResult.containsKey("content")) { extracted.put("error", "OCR识别结果为空"); return extracted; } String content = ocrResult.getString("content"); // 如果有结构化的prism_wordsInfo字段,优先使用 if (ocrResult.containsKey("prism_wordsInfo")) { JSONArray wordsInfo = ocrResult.getJSONArray("prism_wordsInfo"); // 提取金额(匹配包含"金额""合计""¥"的字段) for (int i = 0; i < wordsInfo.size(); i++) { JSONObject word = wordsInfo.getJSONObject(i); String text = word.getString("word"); if (text.contains("金额") || text.contains("合计") || text.contains("¥")) { extracted.put("totalAmount", text); break; } } // 提取日期 for (int i = 0; i < wordsInfo.size(); i++) { JSONObject word = wordsInfo.getJSONObject(i); String text = word.getString("word"); if (text.contains("日期") || text.matches(".*\\d{4}[-/年]\\d{1,2}[-/月]\\d{1,2}.*")) { extracted.put("date", text); break; } } // 提取备注 for (int i = 0; i < wordsInfo.size(); i++) { JSONObject word = wordsInfo.getJSONObject(i); String text = word.getString("word"); if (text.contains("备注")) { extracted.put("remark", text); break; } } } else { // 使用整体文本内容进行简单提取 extracted.put("fullText", content); } return extracted; } /** * 使用自定义AccessKey进行OCR识别 * @param imageUrl 图片URL * @param type 识别类型 * @param accessKeyId AccessKey ID * @param accessKeySecret AccessKey Secret * @return OCR识别结果 */ public static JSONObject recognizeTextWithCredentials(String imageUrl, String type, String accessKeyId, String accessKeySecret) { try { Client client = createClient(accessKeyId, accessKeySecret); RecognizeAllTextRequest request = new RecognizeAllTextRequest() .setUrl(imageUrl) .setType(type); RuntimeOptions runtime = new RuntimeOptions(); RecognizeAllTextResponse response = client.recognizeAllTextWithOptions(request, runtime); String responseJson = com.aliyun.teautil.Common.toJSONString(response.body.data); JSONObject result = JSON.parseObject(responseJson); result.put("success", true); log.info("OCR识别成功(自定义AK),类型: {}", type); return result; } catch (TeaException error) { log.error("OCR识别失败(TeaException): {}", error.getMessage(), error); JSONObject errorResult = new JSONObject(); errorResult.put("success", false); errorResult.put("error", error.getMessage()); if (error.getData() != null) { errorResult.put("recommend", error.getData().get("Recommend")); } return errorResult; } catch (Exception error) { log.error("OCR识别失败(Exception): {}", error.getMessage(), error); JSONObject errorResult = new JSONObject(); errorResult.put("success", false); errorResult.put("error", error.getMessage()); return errorResult; } } }