소스 검색

新起定时任务执行图片和PDF的解析

wukai 1 년 전
부모
커밋
6c1890a213

+ 53 - 0
doc-admin/src/main/java/com/doc/task/DocIndexTask.java

@@ -0,0 +1,53 @@
+package com.doc.task;
+
+import com.doc.biz.domain.DocIndex;
+import com.doc.biz.service.IDocIndexService;
+import com.doc.biz.service.IDocInfoService;
+import com.doc.biz.service.IElasticSearchService;
+import com.doc.framework.web.domain.Server;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.Resource;
+import java.util.List;
+
+/**
+ * 定时任务调度测试
+ *
+ * @author ruoyi
+ */
+@Component("docIndexTask")
+public class DocIndexTask {
+    @Resource
+    private IDocIndexService indexService;
+    @Resource
+    private IDocInfoService infoService;
+    @Resource
+    private IElasticSearchService elasticSearchService;
+
+    public void index() {
+        DocIndex search = new DocIndex();
+        search.setStatus("N");
+        List<DocIndex> list = indexService.selectDocIndexList(search);
+        for (DocIndex index : list) {
+            try {
+                Server server = new Server();
+                server.copyTo();
+                double cpuUsed = server.getCpu().getUsed();
+                double memUsed = server.getMem().getUsed();
+                double limit = 80d;
+                if (cpuUsed > limit || memUsed > limit) {
+                    //如果cpu或者内存使用率超过80,则退出任务,等待下次执行
+                    return;
+                }
+
+            } catch (Exception ignored) {
+            }
+
+            //先设置状态为正在处理
+            index.setStatus("Y");
+            indexService.updateDocIndex(index);
+
+            elasticSearchService.taskSave(infoService.selectDocInfoByDocId(index.getDocId()));
+        }
+    }
+}

+ 114 - 0
doc-biz/src/main/java/com/doc/biz/controller/DocIndexController.java

@@ -0,0 +1,114 @@
+package com.doc.biz.controller;
+
+import java.util.List;
+import javax.annotation.Resource;
+import javax.servlet.http.HttpServletResponse;
+
+import io.swagger.annotations.Api;
+import io.swagger.annotations.ApiOperation;
+import org.springframework.security.access.prepost.PreAuthorize;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.PutMapping;
+import org.springframework.web.bind.annotation.DeleteMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+import com.doc.common.annotation.Log;
+import com.doc.common.core.controller.BaseController;
+import com.doc.common.core.domain.AjaxResult;
+import com.doc.common.enums.BusinessType;
+import com.doc.biz.domain.DocIndex;
+import com.doc.biz.service.IDocIndexService;
+import com.doc.common.utils.poi.ExcelUtil;
+import com.doc.common.core.page.TableDataInfo;
+
+/**
+ * 文件ES索引;存储解析错误和需要延迟解析的Controller
+ * 
+ * @author wukai
+ * @date 2023-12-14
+ */
+@Api(tags="文件ES索引;存储解析错误和需要延迟解析的")
+@RestController
+@RequestMapping("/biz/index")
+public class DocIndexController extends BaseController
+{
+    @Resource
+    private IDocIndexService docIndexService;
+
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的列表
+     */
+    @ApiOperation("查询文件ES索引;存储解析错误和需要延迟解析的列表")
+    @PreAuthorize("@ss.hasPermi('biz:index:list')")
+    @GetMapping("/list")
+    public TableDataInfo list(DocIndex docIndex)
+    {
+        startPage();
+        List<DocIndex> list = docIndexService.selectDocIndexList(docIndex);
+        return getDataTable(list);
+    }
+
+    /**
+     * 导出文件ES索引;存储解析错误和需要延迟解析的列表
+     */
+    @ApiOperation("导出文件ES索引;存储解析错误和需要延迟解析的列表")
+    @PreAuthorize("@ss.hasPermi('biz:index:export')")
+    @Log(title = "文件ES索引;存储解析错误和需要延迟解析的", businessType = BusinessType.EXPORT)
+    @PostMapping("/export")
+    public void export(HttpServletResponse response, DocIndex docIndex)
+    {
+        List<DocIndex> list = docIndexService.selectDocIndexList(docIndex);
+        ExcelUtil<DocIndex> util = new ExcelUtil<DocIndex>(DocIndex.class);
+        util.exportExcel(response, list, "文件ES索引;存储解析错误和需要延迟解析的数据");
+    }
+
+    /**
+     * 获取文件ES索引;存储解析错误和需要延迟解析的详细信息
+     */
+    @ApiOperation("获取文件ES索引;存储解析错误和需要延迟解析的详细信息")
+    @PreAuthorize("@ss.hasPermi('biz:index:query')")
+    @GetMapping(value = "/{docId}")
+    public AjaxResult getInfo(@PathVariable("docId") Long docId)
+    {
+        return success(docIndexService.selectDocIndexByDocId(docId));
+    }
+
+    /**
+     * 新增文件ES索引;存储解析错误和需要延迟解析的
+     */
+    @ApiOperation("新增文件ES索引;存储解析错误和需要延迟解析的")
+    @PreAuthorize("@ss.hasPermi('biz:index:add')")
+    @Log(title = "文件ES索引;存储解析错误和需要延迟解析的", businessType = BusinessType.INSERT)
+    @PostMapping
+    public AjaxResult add(@RequestBody DocIndex docIndex)
+    {
+        return toAjax(docIndexService.insertDocIndex(docIndex));
+    }
+
+    /**
+     * 修改文件ES索引;存储解析错误和需要延迟解析的
+     */
+    @ApiOperation("修改文件ES索引;存储解析错误和需要延迟解析的")
+    @PreAuthorize("@ss.hasPermi('biz:index:edit')")
+    @Log(title = "文件ES索引;存储解析错误和需要延迟解析的", businessType = BusinessType.UPDATE)
+    @PutMapping
+    public AjaxResult edit(@RequestBody DocIndex docIndex)
+    {
+        return toAjax(docIndexService.updateDocIndex(docIndex));
+    }
+
+    /**
+     * 删除文件ES索引;存储解析错误和需要延迟解析的
+     */
+    @ApiOperation("删除文件ES索引;存储解析错误和需要延迟解析的")
+    @PreAuthorize("@ss.hasPermi('biz:index:remove')")
+    @Log(title = "文件ES索引;存储解析错误和需要延迟解析的", businessType = BusinessType.DELETE)
+	@DeleteMapping("/{docIds}")
+    public AjaxResult remove(@PathVariable Long[] docIds)
+    {
+        return toAjax(docIndexService.deleteDocIndexByDocIds(docIds));
+    }
+}

+ 4 - 2
doc-biz/src/main/java/com/doc/biz/controller/ElasticSearchController.java

@@ -15,6 +15,7 @@ import io.swagger.annotations.ApiImplicitParam;
 import io.swagger.annotations.ApiImplicitParams;
 import io.swagger.annotations.ApiOperation;
 import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Sort;
 import org.springframework.data.elasticsearch.NoSuchIndexException;
 import org.springframework.data.elasticsearch.core.SearchHit;
 import org.springframework.web.bind.annotation.GetMapping;
@@ -68,7 +69,8 @@ public class ElasticSearchController {
             String indexName = "docs_" + list.get(0).getSpaceId();
             esConfig.setIndexName(indexName);
             try {
-                List<SearchHit<EsDocInfo>> all = esDocInfoService.findByContent(keyword, PageRequest.of(page, size));
+
+                List<SearchHit<EsDocInfo>> all = esDocInfoService.findByContent(keyword, PageRequest.of(page, size, Sort.by(Sort.Order.desc("id"))));
                 List<SearchHit<EsDocInfo>> result = new ArrayList<>();
                 if (all.size() > 0) {
                     for (SearchHit<EsDocInfo> re : all) {
@@ -90,4 +92,4 @@ public class ElasticSearchController {
             return AjaxResult.success("无结果");
         }
     }
-}
+}

+ 41 - 0
doc-biz/src/main/java/com/doc/biz/domain/DocIndex.java

@@ -0,0 +1,41 @@
+package com.doc.biz.domain;
+
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.doc.common.core.domain.BaseEntity;
+import io.swagger.annotations.ApiModel;
+import io.swagger.annotations.ApiModelProperty;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * 文件ES索引;存储解析错误和需要延迟解析的对象 doc_index
+ *
+ * @author wukai
+ * @date 2023-12-14
+ */
+@ApiModel(value = "DocIndex", description = "文件ES索引;存储解析错误和需要延迟解析的")
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+public class DocIndex extends BaseEntity {
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * 文件ID
+     */
+    @ApiModelProperty("文件ID")
+    @TableId
+    private Long docId;
+
+    /**
+     * 文件类型
+     */
+    @ApiModelProperty("正在解析")
+    private String status;
+
+    public DocIndex(DocInfo info) {
+        this.docId = info.getDocId();
+        this.status = "N";
+    }
+}

+ 62 - 0
doc-biz/src/main/java/com/doc/biz/mapper/DocIndexMapper.java

@@ -0,0 +1,62 @@
+package com.doc.biz.mapper;
+
+import java.util.List;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.doc.biz.domain.DocIndex;
+
+/**
+ * 文件ES索引;存储解析错误和需要延迟解析的Mapper接口
+ * 
+ * @author wukai
+ * @date 2023-12-14
+ */
+public interface DocIndexMapper extends BaseMapper<DocIndex>
+{
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 文件ES索引;存储解析错误和需要延迟解析的
+     */
+    public DocIndex selectDocIndexByDocId(Long docId);
+
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的列表
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 文件ES索引;存储解析错误和需要延迟解析的集合
+     */
+    public List<DocIndex> selectDocIndexList(DocIndex docIndex);
+
+    /**
+     * 新增文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    public int insertDocIndex(DocIndex docIndex);
+
+    /**
+     * 修改文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    public int updateDocIndex(DocIndex docIndex);
+
+    /**
+     * 删除文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 结果
+     */
+    public int deleteDocIndexByDocId(Long docId);
+
+    /**
+     * 批量删除文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIds 需要删除的数据主键集合
+     * @return 结果
+     */
+    public int deleteDocIndexByDocIds(Long[] docIds);
+}

+ 61 - 0
doc-biz/src/main/java/com/doc/biz/service/IDocIndexService.java

@@ -0,0 +1,61 @@
+package com.doc.biz.service;
+
+import java.util.List;
+import com.doc.biz.domain.DocIndex;
+
+/**
+ * 文件ES索引;存储解析错误和需要延迟解析的Service接口
+ * 
+ * @author wukai
+ * @date 2023-12-14
+ */
+public interface IDocIndexService 
+{
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 文件ES索引;存储解析错误和需要延迟解析的
+     */
+    public DocIndex selectDocIndexByDocId(Long docId);
+
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的列表
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 文件ES索引;存储解析错误和需要延迟解析的集合
+     */
+    public List<DocIndex> selectDocIndexList(DocIndex docIndex);
+
+    /**
+     * 新增文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    public int insertDocIndex(DocIndex docIndex);
+
+    /**
+     * 修改文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    public int updateDocIndex(DocIndex docIndex);
+
+    /**
+     * 批量删除文件ES索引;存储解析错误和需要延迟解析的
+     * 
+     * @param docIds 需要删除的文件ES索引;存储解析错误和需要延迟解析的主键集合
+     * @return 结果
+     */
+    public int deleteDocIndexByDocIds(Long[] docIds);
+
+    /**
+     * 删除文件ES索引;存储解析错误和需要延迟解析的信息
+     * 
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 结果
+     */
+    public int deleteDocIndexByDocId(Long docId);
+}

+ 6 - 0
doc-biz/src/main/java/com/doc/biz/service/IElasticSearchService.java

@@ -16,6 +16,12 @@ public interface IElasticSearchService {
      * @param info 文档信息
      */
     void save(DocInfo info);
+  /**
+     * 文件内容入es库
+     *
+     * @param info 文档信息
+     */
+    void taskSave(DocInfo info);
 
     /**
      * 通过ID删除ES内容

+ 86 - 0
doc-biz/src/main/java/com/doc/biz/service/impl/DocIndexServiceImpl.java

@@ -0,0 +1,86 @@
+package com.doc.biz.service.impl;
+
+import java.util.List;
+import org.springframework.stereotype.Service;
+import com.doc.biz.mapper.DocIndexMapper;
+import com.doc.biz.domain.DocIndex;
+import com.doc.biz.service.IDocIndexService;
+import javax.annotation.Resource;
+
+/**
+ * 文件ES索引;存储解析错误和需要延迟解析的Service业务层处理
+ *
+ * @author wukai
+ * @date 2023-12-14
+ */
+@Service
+public class DocIndexServiceImpl implements IDocIndexService {
+    @Resource
+    private DocIndexMapper docIndexMapper;
+
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的
+     *
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 文件ES索引;存储解析错误和需要延迟解析的
+     */
+    @Override
+    public DocIndex selectDocIndexByDocId(Long docId) {
+        return docIndexMapper.selectDocIndexByDocId(docId);
+    }
+
+    /**
+     * 查询文件ES索引;存储解析错误和需要延迟解析的列表
+     *
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 文件ES索引;存储解析错误和需要延迟解析的
+     */
+    @Override
+    public List<DocIndex> selectDocIndexList(DocIndex docIndex) {
+        return docIndexMapper.selectDocIndexList(docIndex);
+    }
+
+    /**
+     * 新增文件ES索引;存储解析错误和需要延迟解析的
+     *
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    @Override
+    public int insertDocIndex(DocIndex docIndex) {
+            return docIndexMapper.insertDocIndex(docIndex);
+    }
+
+    /**
+     * 修改文件ES索引;存储解析错误和需要延迟解析的
+     *
+     * @param docIndex 文件ES索引;存储解析错误和需要延迟解析的
+     * @return 结果
+     */
+    @Override
+    public int updateDocIndex(DocIndex docIndex) {
+        return docIndexMapper.updateDocIndex(docIndex);
+    }
+
+    /**
+     * 批量删除文件ES索引;存储解析错误和需要延迟解析的
+     *
+     * @param docIds 需要删除的文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 结果
+     */
+    @Override
+    public int deleteDocIndexByDocIds(Long[] docIds) {
+        return docIndexMapper.deleteDocIndexByDocIds(docIds);
+    }
+
+    /**
+     * 删除文件ES索引;存储解析错误和需要延迟解析的信息
+     *
+     * @param docId 文件ES索引;存储解析错误和需要延迟解析的主键
+     * @return 结果
+     */
+    @Override
+    public int deleteDocIndexByDocId(Long docId) {
+        return docIndexMapper.deleteDocIndexByDocId(docId);
+    }
+}

+ 9 - 4
doc-biz/src/main/java/com/doc/biz/service/impl/DocInfoServiceImpl.java

@@ -7,6 +7,7 @@ import com.doc.biz.domain.EsDocInfo;
 import com.doc.biz.mapper.DocInfoMapper;
 import com.doc.biz.service.*;
 import com.doc.biz.vo.DocumentVO;
+import com.doc.common.constant.Constants;
 import com.doc.common.utils.DateUtils;
 import com.doc.common.utils.SecurityUtils;
 import com.doc.common.utils.StringUtils;
@@ -148,10 +149,14 @@ public class DocInfoServiceImpl implements IDocInfoService {
         checkDuplicateNames(docInfo);
 
         int i = docInfoMapper.insertDocInfo(docInfo);
-        String allowSearch = "l0300";
-        if (dirService.hasRole(docInfo.getDirId(), allowSearch)) {
-            //只有允许全文搜索的才会存入ES库,否则不存入
-            elasticSearchService.save(docInfo);
+        String fileType = docInfo.getFileType().toLowerCase();
+        if (docInfo.getFileSize() > 0 && Constants.ES_EXTENSION.contains(fileType)) {
+            //文件不为空,且可以解析的文件后缀名
+            String allowSearch = "l0300";
+            if (dirService.hasRole(docInfo.getDirId(), allowSearch)) {
+                //只有允许全文搜索的才会存入ES库,否则不存入
+                elasticSearchService.save(docInfo);
+            }
         }
         //改变空间容量
         spaceService.updateUsedCap(docInfo.getSpaceId());

+ 84 - 22
doc-biz/src/main/java/com/doc/biz/service/impl/ElasticSearchServiceImpl.java

@@ -1,5 +1,6 @@
 package com.doc.biz.service.impl;
 
+import com.doc.biz.domain.DocIndex;
 import com.doc.biz.domain.DocInfo;
 import com.doc.biz.domain.EsDocInfo;
 import com.doc.biz.service.*;
@@ -43,6 +44,8 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
     private IOcrService ocrService;
     @Resource
     private IDocInfoService docInfoService;
+    @Resource
+    private IDocIndexService indexService;
 
     /**
      * 文件内容入es库
@@ -52,12 +55,24 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
     @Override
     @Async("threadPoolTaskExecutor")
     public void save(DocInfo info) {
-        if (info.getFileSize() == 0) {
+        String fileType = info.getFileType().toLowerCase();
+
+        if (Constants.IMAGE_EXTENSION.contains(fileType)) {
+            DocIndex di = new DocIndex(info);
+            indexService.insertDocIndex(di);
             return;
         }
-        String fileType = info.getFileType().toLowerCase();
+
         Map<String, Function<byte[], String>> handlerMap = createHandlerMap(fileType);
         String content = getContent(handlerMap, fileType, info.getFileId());
+
+        if (Constants.PDF_EXTENSION.contains(fileType) && StringUtils.isEmpty(content)) {
+            //如果是PDF未解析成功,则另外进行解析工作
+            DocIndex di = new DocIndex(info);
+            indexService.insertDocIndex(di);
+            return;
+        }
+
         if (StringUtils.isNotEmpty(content)) {
             comboIndex(info.getSpaceId());
             try {
@@ -70,6 +85,37 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
     }
 
     /**
+     * 文件内容入es库
+     *
+     * @param info 文档信息
+     */
+    @Override
+    @Async("threadPoolTaskExecutor")
+    public void taskSave(DocInfo info) {
+        String fileType = info.getFileType().toLowerCase();
+        String content = "";
+        if (Constants.IMAGE_EXTENSION.contains(fileType)) {
+            content = getContent(fileType, info.getFileId());
+        } else {
+            Map<String, Function<byte[], String>> handlerMap = createHandlerMap(fileType);
+            content = getContent(handlerMap, fileType, info.getFileId());
+            if (Constants.PDF_EXTENSION.contains(fileType) && StringUtils.isEmpty(content)) {
+                //如果是PDF未解析成功,则另外进行解析工作
+                content = getContent(fileType, info.getFileId());
+            }
+
+        }
+
+        if (StringUtils.isNotEmpty(content)) {
+            comboIndex(info.getSpaceId());
+            EsDocInfo esDocInfo = new EsDocInfo(info.getDocId(), content);
+            esDocInfoService.save(esDocInfo);
+        }
+        //执行完,删除索引任务
+        indexService.deleteDocIndexByDocId(info.getDocId());
+    }
+
+    /**
      * 通过ID删除ES内容
      *
      * @param info 文档信息
@@ -138,7 +184,7 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
             case ".doc":
             case ".wps":
                 //word wps文档
-                handlerMap.put(fileType, FileContentUtils::getContentDocx);
+                handlerMap.put(fileType, FileContentUtils::getContentDoc);
                 break;
             case ".xls":
             case ".et":
@@ -157,6 +203,9 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
             case ".pdf":
                 handlerMap.put(fileType, FileContentUtils::getContentPdf);
                 break;
+            case ".txt":
+                handlerMap.put(fileType, FileContentUtils::getContentTxt);
+                break;
             default:
                 break;
         }
@@ -173,34 +222,47 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
      */
     private String getContent(Map<String, Function<byte[], String>> handlerMap, String fileType, String fileId) {
         Function<byte[], String> handler = handlerMap.get(fileType);
+        if (handler != null) {
+            DocumentVO vo = mongoService.downloadFile(fileId);
+            if (vo != null) {
+                return handler.apply(vo.getData());
+            }
+        }
+        return "";
+    }
+
+    /**
+     * 获取文件内容
+     *
+     * @param fileType 文件类型
+     * @param fileId   文件ID
+     * @return 解析结果
+     */
+    private String getContent(String fileType, String fileId) {
         if (Constants.IMAGE_EXTENSION.contains(fileType)) {
             try {
                 return ocrService.recognition(fileId);
             } catch (Exception e) {
                 log.error("解析图片错误:{}", e.getMessage());
             }
-        } else if (handler != null) {
+        }
+        String pdf = ".pdf";
+        if (pdf.equals(fileType)) {
             DocumentVO vo = mongoService.downloadFile(fileId);
-            if (vo != null) {
-                String result = handler.apply(vo.getData());
-                String pdf = ".pdf";
-                if (pdf.equals(fileType) && StringUtils.isEmpty(result)) {
-                    StringBuffer sb = new StringBuffer();
-                    //如果是PDF未解析成功,则另外进行解析工作
-                    List<File> list = PdfUtils.pdfToImage(vo.getData());
-                    list.forEach(file -> {
-                        MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
-                        try {
-                            sb.append(ocrService.recognition(f));
-                        } catch (Exception e) {
-                            log.error("解析图片错误:{}", e.getMessage());
-                        }
-                    });
-                    return sb.toString();
+            StringBuffer sb = new StringBuffer();
+            //PDF图片解析
+            List<File> list = PdfUtils.pdfToImage(vo.getData());
+            list.forEach(file -> {
+                MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
+                try {
+                    sb.append(ocrService.recognition(f));
+                } catch (Exception e) {
+                    log.error("解析图片错误:{}", e.getMessage());
                 }
-            }
+            });
+            return sb.toString();
         }
         return "";
     }
-
 }
+

+ 63 - 0
doc-biz/src/main/resources/mapper/biz/DocIndexMapper.xml

@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper
+PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.doc.biz.mapper.DocIndexMapper">
+
+    <resultMap type="DocIndex" id="DocIndexResult">
+        <result property="docId"    column="DOC_ID"    />
+        <result property="status"    column="STATUS"    />
+        <result property="remark"    column="REMARK"    />
+    </resultMap>
+
+    <sql id="selectDocIndexVo">
+        select DOC_ID, STATUS, REMARK from doc_index
+    </sql>
+
+    <select id="selectDocIndexList" parameterType="DocIndex" resultMap="DocIndexResult">
+        <include refid="selectDocIndexVo"/>
+        <where>
+            <if test="status != null  and status != ''"> and STATUS = #{status}</if>
+            <if test="remark != null  and remark != ''"> and REMARK = #{remark}</if>
+        </where>
+    </select>
+
+    <select id="selectDocIndexByDocId" parameterType="Long" resultMap="DocIndexResult">
+        <include refid="selectDocIndexVo"/>
+        where DOC_ID = #{docId}
+    </select>
+
+    <insert id="insertDocIndex" parameterType="DocIndex">
+        insert into doc_index
+        <trim prefix="(" suffix=")" suffixOverrides=",">
+            <if test="docId != null">DOC_ID,</if>
+            <if test="status != null">STATUS,</if>
+            <if test="remark != null">REMARK,</if>
+         </trim>
+        <trim prefix="values (" suffix=")" suffixOverrides=",">
+            <if test="docId != null">#{docId},</if>
+            <if test="status != null">#{status},</if>
+            <if test="remark != null">#{remark},</if>
+         </trim>
+    </insert>
+
+    <update id="updateDocIndex" parameterType="DocIndex">
+        update doc_index
+        <trim prefix="SET" suffixOverrides=",">
+            <if test="status != null">STATUS = #{status},</if>
+            <if test="remark != null">REMARK = #{remark},</if>
+        </trim>
+        where DOC_ID = #{docId}
+    </update>
+
+    <delete id="deleteDocIndexByDocId" parameterType="Long">
+        delete from doc_index where DOC_ID = #{docId}
+    </delete>
+
+    <delete id="deleteDocIndexByDocIds" parameterType="String">
+        delete from doc_index where DOC_ID in
+        <foreach item="docId" collection="array" open="(" separator="," close=")">
+            #{docId}
+        </foreach>
+    </delete>
+</mapper>

+ 29 - 0
doc-common/src/main/java/com/doc/common/constant/Constants.java

@@ -149,6 +149,35 @@ public class Constants {
      */
     public static final String IMAGE_EXTENSION = ".bmp.gif.jpg.jpeg.png";
     /**
+     * 所有需要存入ES数据库的文件格式
+     */
+    public static final String ES_EXTENSION = Constants.WORD_EXTENSION + Constants.EXCEL_EXTENSION
+            + Constants.PPT_EXTENSION + Constants.PDF_EXTENSION + Constants.TXT_EXTENSION + Constants.IMAGE_EXTENSION;
+    /**
+     * 需要OCR解析的文件格式
+     */
+    public static final String OCR_EXTENSION = Constants.IMAGE_EXTENSION + Constants.PDF_EXTENSION;
+    /**
+     * WORD 文档
+     */
+    public static final String WORD_EXTENSION = ".docx.doc.wps";
+    /**
+     * EXCEL文档
+     */
+    public static final String EXCEL_EXTENSION = ".xls.et.xlsx";
+    /**
+     * PPT文档
+     */
+    public static final String PPT_EXTENSION = ".ppt.pptx.dps";
+    /**
+     * PDF文档
+     */
+    public static final String PDF_EXTENSION = ".pdf";
+    /**
+     * TXT文档
+     */
+    public static final String TXT_EXTENSION = ".txt";
+    /**
      * 新增一个常量LOGIN_USERID_KEY公用
      */
     public static final String LOGIN_USERID_KEY = "login_userid:";