|
@@ -1,5 +1,6 @@
|
|
|
package com.doc.biz.service.impl;
|
|
|
|
|
|
+import com.doc.biz.domain.DocIndex;
|
|
|
import com.doc.biz.domain.DocInfo;
|
|
|
import com.doc.biz.domain.EsDocInfo;
|
|
|
import com.doc.biz.service.*;
|
|
@@ -43,6 +44,8 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
private IOcrService ocrService;
|
|
|
@Resource
|
|
|
private IDocInfoService docInfoService;
|
|
|
+ @Resource
|
|
|
+ private IDocIndexService indexService;
|
|
|
|
|
|
/**
|
|
|
* 文件内容入es库
|
|
@@ -52,12 +55,24 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
@Override
|
|
|
@Async("threadPoolTaskExecutor")
|
|
|
public void save(DocInfo info) {
|
|
|
- if (info.getFileSize() == 0) {
|
|
|
+ String fileType = info.getFileType().toLowerCase();
|
|
|
+
|
|
|
+ if (Constants.IMAGE_EXTENSION.contains(fileType)) {
|
|
|
+ DocIndex di = new DocIndex(info);
|
|
|
+ indexService.insertDocIndex(di);
|
|
|
return;
|
|
|
}
|
|
|
- String fileType = info.getFileType().toLowerCase();
|
|
|
+
|
|
|
Map<String, Function<byte[], String>> handlerMap = createHandlerMap(fileType);
|
|
|
String content = getContent(handlerMap, fileType, info.getFileId());
|
|
|
+
|
|
|
+ if (Constants.PDF_EXTENSION.contains(fileType) && StringUtils.isEmpty(content)) {
|
|
|
+ //如果是PDF未解析成功,则另外进行解析工作
|
|
|
+ DocIndex di = new DocIndex(info);
|
|
|
+ indexService.insertDocIndex(di);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
if (StringUtils.isNotEmpty(content)) {
|
|
|
comboIndex(info.getSpaceId());
|
|
|
try {
|
|
@@ -70,6 +85,37 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
+ * 文件内容入es库
|
|
|
+ *
|
|
|
+ * @param info 文档信息
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ @Async("threadPoolTaskExecutor")
|
|
|
+ public void taskSave(DocInfo info) {
|
|
|
+ String fileType = info.getFileType().toLowerCase();
|
|
|
+ String content = "";
|
|
|
+ if (Constants.IMAGE_EXTENSION.contains(fileType)) {
|
|
|
+ content = getContent(fileType, info.getFileId());
|
|
|
+ } else {
|
|
|
+ Map<String, Function<byte[], String>> handlerMap = createHandlerMap(fileType);
|
|
|
+ content = getContent(handlerMap, fileType, info.getFileId());
|
|
|
+ if (Constants.PDF_EXTENSION.contains(fileType) && StringUtils.isEmpty(content)) {
|
|
|
+ //如果是PDF未解析成功,则另外进行解析工作
|
|
|
+ content = getContent(fileType, info.getFileId());
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ if (StringUtils.isNotEmpty(content)) {
|
|
|
+ comboIndex(info.getSpaceId());
|
|
|
+ EsDocInfo esDocInfo = new EsDocInfo(info.getDocId(), content);
|
|
|
+ esDocInfoService.save(esDocInfo);
|
|
|
+ }
|
|
|
+ //执行完,删除索引任务
|
|
|
+ indexService.deleteDocIndexByDocId(info.getDocId());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
* 通过ID删除ES内容
|
|
|
*
|
|
|
* @param info 文档信息
|
|
@@ -138,7 +184,7 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
case ".doc":
|
|
|
case ".wps":
|
|
|
//word wps文档
|
|
|
- handlerMap.put(fileType, FileContentUtils::getContentDocx);
|
|
|
+ handlerMap.put(fileType, FileContentUtils::getContentDoc);
|
|
|
break;
|
|
|
case ".xls":
|
|
|
case ".et":
|
|
@@ -157,6 +203,9 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
case ".pdf":
|
|
|
handlerMap.put(fileType, FileContentUtils::getContentPdf);
|
|
|
break;
|
|
|
+ case ".txt":
|
|
|
+ handlerMap.put(fileType, FileContentUtils::getContentTxt);
|
|
|
+ break;
|
|
|
default:
|
|
|
break;
|
|
|
}
|
|
@@ -173,34 +222,47 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
|
|
|
*/
|
|
|
private String getContent(Map<String, Function<byte[], String>> handlerMap, String fileType, String fileId) {
|
|
|
Function<byte[], String> handler = handlerMap.get(fileType);
|
|
|
+ if (handler != null) {
|
|
|
+ DocumentVO vo = mongoService.downloadFile(fileId);
|
|
|
+ if (vo != null) {
|
|
|
+ return handler.apply(vo.getData());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取文件内容
|
|
|
+ *
|
|
|
+ * @param fileType 文件类型
|
|
|
+ * @param fileId 文件ID
|
|
|
+ * @return 解析结果
|
|
|
+ */
|
|
|
+ private String getContent(String fileType, String fileId) {
|
|
|
if (Constants.IMAGE_EXTENSION.contains(fileType)) {
|
|
|
try {
|
|
|
return ocrService.recognition(fileId);
|
|
|
} catch (Exception e) {
|
|
|
log.error("解析图片错误:{}", e.getMessage());
|
|
|
}
|
|
|
- } else if (handler != null) {
|
|
|
+ }
|
|
|
+ String pdf = ".pdf";
|
|
|
+ if (pdf.equals(fileType)) {
|
|
|
DocumentVO vo = mongoService.downloadFile(fileId);
|
|
|
- if (vo != null) {
|
|
|
- String result = handler.apply(vo.getData());
|
|
|
- String pdf = ".pdf";
|
|
|
- if (pdf.equals(fileType) && StringUtils.isEmpty(result)) {
|
|
|
- StringBuffer sb = new StringBuffer();
|
|
|
- //如果是PDF未解析成功,则另外进行解析工作
|
|
|
- List<File> list = PdfUtils.pdfToImage(vo.getData());
|
|
|
- list.forEach(file -> {
|
|
|
- MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
|
|
|
- try {
|
|
|
- sb.append(ocrService.recognition(f));
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("解析图片错误:{}", e.getMessage());
|
|
|
- }
|
|
|
- });
|
|
|
- return sb.toString();
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ //PDF图片解析
|
|
|
+ List<File> list = PdfUtils.pdfToImage(vo.getData());
|
|
|
+ list.forEach(file -> {
|
|
|
+ MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
|
|
|
+ try {
|
|
|
+ sb.append(ocrService.recognition(f));
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("解析图片错误:{}", e.getMessage());
|
|
|
}
|
|
|
- }
|
|
|
+ });
|
|
|
+ return sb.toString();
|
|
|
}
|
|
|
return "";
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
+
|