Răsfoiți Sursa

改造OCR识别功能

wukai 1 an în urmă
părinte
comite
c021875ba8

+ 1 - 1
doc-admin/src/main/java/com/doc/web/controller/system/SysLoginController.java

@@ -131,7 +131,7 @@ public class SysLoginController {
     public AjaxResult face(@RequestParam("file") MultipartFile file) throws Exception {
         String originalFilename = file.getOriginalFilename();
         String[] filename = originalFilename.split("\\.");
-        File upFile = File.createTempFile("temp_" + filename[0], filename[1] + ".");
+        File upFile = File.createTempFile("temp_" + filename[0], "." + filename[1]);
         file.transferTo(upFile);
 
         return faceLogin(upFile);

+ 17 - 2
doc-biz/src/main/java/com/doc/biz/controller/ApiController.java

@@ -16,6 +16,7 @@ import org.springframework.web.bind.annotation.*;
 import org.springframework.web.multipart.MultipartFile;
 
 import javax.annotation.Resource;
+import java.util.Map;
 
 /**
  * 文件上传
@@ -92,12 +93,25 @@ public class ApiController extends BaseController {
     @GetMapping("/ocr/{fileId}")
     public AjaxResult ocr(@PathVariable(name = "fileId") String fileId) {
         try {
-            return success(ocrService.recognition(fileId));
+            Map<String, Object> result = ocrService.recognition(fileId);
+            return ocrResult(result);
         } catch (Exception e) {
             return error(e.getMessage());
         }
     }
 
+    /**
+     * 处理json数据返回
+     *
+     * @param result 接口返回字符串
+     * @return 前端所需结果
+     */
+    private AjaxResult ocrResult(Map<String, Object> result) {
+        AjaxResult ajaxResult = success();
+        ajaxResult.putAll(result);
+        return ajaxResult;
+    }
+
 
     /**
      * 文字识别
@@ -109,7 +123,8 @@ public class ApiController extends BaseController {
     @PostMapping("/upload")
     public AjaxResult ocr(@RequestParam(value = "file") MultipartFile file) {
         try {
-            return success(ocrService.recognition(file));
+            Map<String, Object> result = ocrService.recognition(file);
+            return ocrResult(result);
         } catch (Exception e) {
             return error(e.getMessage());
         }

+ 4 - 2
doc-biz/src/main/java/com/doc/biz/service/IOcrService.java

@@ -2,6 +2,8 @@ package com.doc.biz.service;
 
 import org.springframework.web.multipart.MultipartFile;
 
+import java.util.Map;
+
 /**
  * OCR服务
  *
@@ -15,7 +17,7 @@ public interface IOcrService {
      * @return 识别结果
      * @throws Exception 异常
      */
-    String recognition(String fileId) throws Exception;
+    Map<String,Object> recognition(String fileId) throws Exception;
 
     /**
      * 文字识别
@@ -24,5 +26,5 @@ public interface IOcrService {
      * @return 识别结果
      * @throws Exception 异常
      */
-    String recognition(MultipartFile file) throws Exception;
+    Map<String,Object> recognition(MultipartFile file) throws Exception;
 }

+ 24 - 28
doc-biz/src/main/java/com/doc/biz/service/impl/ElasticSearchServiceImpl.java

@@ -10,19 +10,14 @@ import com.doc.common.constant.Constants;
 import com.doc.common.utils.FileContentUtils;
 import com.doc.common.utils.StringUtils;
 import com.doc.common.utils.Tools;
-import com.doc.common.utils.file.FileUtils;
-import com.doc.common.utils.file.PdfUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.core.env.Environment;
 import org.springframework.data.elasticsearch.NoSuchIndexException;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
-import org.springframework.web.multipart.MultipartFile;
 
 import javax.annotation.Resource;
-import java.io.File;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.function.Function;
@@ -245,30 +240,31 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
      * @return 解析结果
      */
     private String getContent(String fileType, String fileId) throws Exception {
-        if (Constants.IMAGE_EXTENSION.contains(fileType)) {
-            try {
-                return ocrService.recognition(fileId);
-            } catch (Exception e) {
-                throw new Exception(e.getMessage());
-            }
-        }
-        String pdf = ".pdf";
-        if (pdf.equals(fileType)) {
-            DocumentVO vo = mongoService.downloadFile(fileId);
-            StringBuffer sb = new StringBuffer();
-            //PDF图片解析
-            List<File> list = PdfUtils.pdfToImage(vo.getData());
-            list.forEach(file -> {
-                MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
-                try {
-                    sb.append(ocrService.recognition(f));
-                } catch (Exception e) {
-                    throw new RuntimeException(e);
-                }
-            });
-            return sb.toString();
+//        if (Constants.IMAGE_EXTENSION.contains(fileType)) {
+        try {
+            Map<String, Object> result = ocrService.recognition(fileId);
+            return (String) result.get("txt");
+        } catch (Exception e) {
+            throw new Exception(e.getMessage());
         }
-        return "";
+//        }
+//        String pdf = ".pdf";
+//        if (pdf.equals(fileType)) {
+//            DocumentVO vo = mongoService.downloadFile(fileId);
+//            StringBuffer sb = new StringBuffer();
+//            //PDF图片解析
+//            List<File> list = PdfUtils.pdfToImage(vo.getData());
+//            list.forEach(file -> {
+//                MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
+//                try {
+//                    sb.append(ocrService.recognition(f));
+//                } catch (Exception e) {
+//                    throw new RuntimeException(e);
+//                }
+//            });
+//            return sb.toString();
+//        }
+//        return "";
     }
 
 

+ 12 - 7
doc-biz/src/main/java/com/doc/biz/service/impl/OcrServiceImpl.java

@@ -1,10 +1,12 @@
 package com.doc.biz.service.impl;
 
 import cn.hutool.http.HttpUtil;
+import com.alibaba.fastjson2.JSON;
 import com.doc.biz.service.IMongoService;
 import com.doc.biz.service.IOcrService;
 import com.doc.biz.vo.DocumentVO;
 import com.doc.system.service.ISysConfigService;
+import lombok.extern.slf4j.Slf4j;
 import org.springframework.context.ApplicationContext;
 import org.springframework.context.ConfigurableApplicationContext;
 import org.springframework.data.DataVerify;
@@ -17,6 +19,7 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.util.HashMap;
+import java.util.Map;
 
 /**
  * ElasticSearch 索引接口
@@ -25,6 +28,7 @@ import java.util.HashMap;
  * @date 2023-08-15
  */
 @Service
+@Slf4j
 public class OcrServiceImpl implements IOcrService {
     @Resource
     private ISysConfigService configService;
@@ -38,9 +42,9 @@ public class OcrServiceImpl implements IOcrService {
      * @return 识别结果
      */
     @Override
-    public String recognition(String fileId) throws Exception {
+    public Map<String, Object> recognition(String fileId) throws Exception {
         DocumentVO vo = mongoService.downloadFile(fileId);
-        File upFile = File.createTempFile(vo.getFileName(), vo.getSuffix() + ".");
+        File upFile = File.createTempFile(vo.getFileName() + System.currentTimeMillis(), "." + vo.getSuffix());
         Files.write(upFile.toPath(), vo.getData());
         return callPythonOcrApi(upFile);
     }
@@ -52,11 +56,11 @@ public class OcrServiceImpl implements IOcrService {
      * @return 识别结果
      */
     @Override
-    public String recognition(MultipartFile file) throws Exception {
+    public Map<String, Object> recognition(MultipartFile file) throws Exception {
         //选择用缓冲区来实现这个转换即使用java 创建的临时文件 使用 MultipartFile.transferto()方法 。
         String originalFilename = file.getOriginalFilename();
         String[] filename = originalFilename.split("\\.");
-        File upFile = File.createTempFile(filename[0], filename[1] + ".");
+        File upFile = File.createTempFile(filename[0] + System.currentTimeMillis(), "." + filename[1]);
         file.transferTo(upFile);
         return callPythonOcrApi(upFile);
     }
@@ -67,16 +71,17 @@ public class OcrServiceImpl implements IOcrService {
      * @param file 文件
      * @return 识别结果
      */
-    private String callPythonOcrApi(File file) {
+    private Map<String, Object> callPythonOcrApi(File file) {
         HashMap<String, Object> map = new HashMap<>(3);
         map.put("file", file);
         String uri = configService.selectConfigByKey("api.ocr.uri");
-        String result = HttpUtil.post(uri, map);
+        String response = HttpUtil.post(uri, map);
         try {
             Files.delete(file.toPath());
         } catch (IOException ignored) {
         }
-        return result;
+        return JSON.parseObject(response);
+
     }
 
     @Resource