Browse Source

集成elasticsearch

wukai 1 year ago
parent
commit
b624ede370

+ 4 - 0
doc-admin/src/main/resources/application-dev.yml

@@ -8,6 +8,10 @@ ruoyi:
   profile: D:/ruoyi/uploadPath
 # Spring配置
 spring:
+  #es配置
+  elasticsearch:
+    rest:
+      uris: http://192.168.188.188:19200
   #mongodb配置
   data:
     mongodb:

+ 18 - 0
doc-biz/pom.xml

@@ -34,6 +34,24 @@
             <groupId>org.springframework.boot</groupId>
             <artifactId>spring-boot-starter-websocket</artifactId>
         </dependency>
+        <!-- 集成elasticsearch start -->
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
+            <!--                        <version>3.0.4</version>-->
+            <!--            <exclusions>-->
+            <!--                <exclusion>-->
+            <!--                    <groupId>org.elasticsearch</groupId>-->
+            <!--                    <artifactId>elasticsearch</artifactId>-->
+            <!--                </exclusion>-->
+            <!--            </exclusions>-->
+        </dependency>
+        <dependency>
+            <groupId>org.elasticsearch</groupId>
+            <artifactId>elasticsearch</artifactId>
+            <version>8.6.2</version>
+        </dependency>
+        <!-- 集成elasticsearch end -->
 
     </dependencies>
 

+ 71 - 11
doc-biz/src/main/java/com/doc/biz/controller/DocInfoController.java

@@ -1,20 +1,27 @@
 package com.doc.biz.controller;
 
 import com.doc.biz.domain.DocInfo;
+import com.doc.biz.domain.DocSpace;
+import com.doc.biz.domain.EsDocInfo;
 import com.doc.biz.service.IDocInfoService;
+import com.doc.biz.service.IEsDocInfoService;
 import com.doc.biz.service.IMongoService;
 import com.doc.biz.vo.DocumentVO;
 import com.doc.common.annotation.Log;
+import com.doc.common.config.EsConfig;
 import com.doc.common.core.controller.BaseController;
 import com.doc.common.core.domain.AjaxResult;
 import com.doc.common.core.page.TableDataInfo;
 import com.doc.common.enums.BusinessType;
+import com.doc.common.enums.SpaceType;
+import com.doc.common.utils.FileContentUtils;
 import com.doc.common.utils.SecurityUtils;
 import com.doc.common.utils.poi.ExcelUtil;
 import io.swagger.annotations.Api;
 import io.swagger.annotations.ApiOperation;
 import io.swagger.annotations.ApiParam;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.data.elasticsearch.NoSuchIndexException;
 import org.springframework.http.HttpHeaders;
 import org.springframework.http.HttpStatus;
 import org.springframework.http.ResponseEntity;
@@ -24,8 +31,12 @@ import org.yaml.snakeyaml.util.UriEncoder;
 
 import javax.annotation.Resource;
 import javax.servlet.http.HttpServletResponse;
+import java.io.File;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
+import java.util.function.Function;
 
 /**
  * 文件基本信息表Controller
@@ -42,6 +53,10 @@ public class DocInfoController extends BaseController {
     private IDocInfoService docInfoService;
     @Resource
     private IMongoService mongoService;
+    @Resource
+    private IEsDocInfoService esDocInfoService;
+    @Resource
+    private EsConfig esConfig;
 
     /**
      * 文件上传
@@ -51,9 +66,9 @@ public class DocInfoController extends BaseController {
      */
     @ApiOperation("上传文件")
     @PostMapping("/upload")
-    public AjaxResult uploadFile(@ApiParam(value = "文件" , required = true) @RequestPart(value = "file") MultipartFile file,
-                                 @ApiParam(value = "空间ID" , required = true) @RequestParam Long spaceId,
-                                 @ApiParam(value = "目录ID" , required = true) @RequestParam Long dirId) {
+    public AjaxResult uploadFile(@ApiParam(value = "文件", required = true) @RequestPart(value = "file") MultipartFile file,
+                                 @ApiParam(value = "空间ID", required = true) @RequestParam Long spaceId,
+                                 @ApiParam(value = "目录ID", required = true) @RequestParam Long dirId) {
         try {
             DocumentVO vo = mongoService.uploadFile(file);
             DocInfo docInfo = new DocInfo();
@@ -64,9 +79,14 @@ public class DocInfoController extends BaseController {
             docInfo.setFileSize(vo.getFileSize());
             docInfo.setFileType(vo.getSuffix());
             docInfo.setCreateBy(SecurityUtils.getUsername());
-            return toAjax(docInfoService.insertDocInfo(docInfo));
+            docInfoService.insertDocInfo(docInfo);
+
+            DocumentVO documentVO = mongoService.downloadFile(vo.getFileId());
+            insertEs(documentVO.getData(), docInfo.getFileType(), docInfo.getDocId(), spaceId);
+
+            return success();
         } catch (Exception e) {
-            log.error("文件上传失败:" , e);
+            log.error("文件上传失败:", e);
             return error(e.getMessage());
         }
     }
@@ -100,7 +120,7 @@ public class DocInfoController extends BaseController {
             return ResponseEntity.ok()
                     .header(HttpHeaders.CONTENT_DISPOSITION, "attachment;filename=\"" + UriEncoder.encode(mongoFileVo.getFileName()) + "\"")
                     .header(HttpHeaders.CONTENT_TYPE, mongoFileVo.getContentType())
-                    .header(HttpHeaders.CONTENT_LENGTH, mongoFileVo.getFileSize() + "").header("Connection" , "close")
+                    .header(HttpHeaders.CONTENT_LENGTH, mongoFileVo.getFileSize() + "").header("Connection", "close")
                     .body(mongoFileVo.getData());
         } else {
             return ResponseEntity.status(HttpStatus.NOT_FOUND).body("file does not exist");
@@ -121,7 +141,7 @@ public class DocInfoController extends BaseController {
             return ResponseEntity.ok()
                     .header(HttpHeaders.CONTENT_DISPOSITION, "filename=\"" + UriEncoder.encode(mongoFileVo.getFileName()) + "\"")
                     .header(HttpHeaders.CONTENT_TYPE, mongoFileVo.getContentType())
-                    .header(HttpHeaders.CONTENT_LENGTH, mongoFileVo.getFileSize() + "").header("Connection" , "close")
+                    .header(HttpHeaders.CONTENT_LENGTH, mongoFileVo.getFileSize() + "").header("Connection", "close")
                     .body(mongoFileVo.getData());
         } else {
             return ResponseEntity.status(HttpStatus.NOT_FOUND).body("file does not exist");
@@ -145,7 +165,7 @@ public class DocInfoController extends BaseController {
      */
     @ApiOperation("导出文件基本信息表列表")
     //@PreAuthorize("@ss.hasPermi('biz:info:export')")
-    @Log(title = "文件基本信息表" , businessType = BusinessType.EXPORT)
+    @Log(title = "文件基本信息表", businessType = BusinessType.EXPORT)
     @PostMapping("/export")
     public void export(HttpServletResponse response, DocInfo docInfo) {
         List<DocInfo> list = docInfoService.selectDocInfoList(docInfo);
@@ -168,7 +188,7 @@ public class DocInfoController extends BaseController {
      */
     @ApiOperation("新增文件基本信息表")
     //@PreAuthorize("@ss.hasPermi('biz:info:add')")
-    @Log(title = "文件基本信息表" , businessType = BusinessType.INSERT)
+    @Log(title = "文件基本信息表", businessType = BusinessType.INSERT)
     @PostMapping
     public AjaxResult add(@RequestBody DocInfo docInfo) {
         return toAjax(docInfoService.insertDocInfo(docInfo));
@@ -179,9 +199,10 @@ public class DocInfoController extends BaseController {
      */
     @ApiOperation("修改文件基本信息表")
     //@PreAuthorize("@ss.hasPermi('biz:info:edit')")
-    @Log(title = "文件基本信息表" , businessType = BusinessType.UPDATE)
+    @Log(title = "文件基本信息表", businessType = BusinessType.UPDATE)
     @PutMapping
     public AjaxResult edit(@RequestBody DocInfo docInfo) {
+        //TODO 文件内容修改的ES处理
         docInfo.setUpdateBy(SecurityUtils.getUsername());
         return toAjax(docInfoService.updateDocInfo(docInfo));
     }
@@ -191,13 +212,52 @@ public class DocInfoController extends BaseController {
      */
     @ApiOperation("删除文件基本信息表")
     //@PreAuthorize("@ss.hasPermi('biz:info:remove')")
-    @Log(title = "文件基本信息表" , businessType = BusinessType.DELETE)
+    @Log(title = "文件基本信息表", businessType = BusinessType.DELETE)
     @DeleteMapping("/{docIds}")
     public AjaxResult remove(@PathVariable Long[] docIds) {
         for (Long docId : docIds) {
             DocInfo info = docInfoService.selectDocInfoByDocId(docId);
+            //删除mongo记录
             mongoService.removeFile(info.getFileId());
+
+            try {
+                //删除ES记录
+                String indexName = "docs_" + info.getSpaceId();
+                esConfig.setIndexName(indexName);
+                esDocInfoService.deleteById(docId);
+            } catch (NoSuchIndexException e) {
+                //不用管,表示没这个索引
+            }
         }
         return toAjax(docInfoService.deleteDocInfoByDocIds(docIds));
     }
+
+
+    /**
+     * 文件内容入es库
+     *
+     * @param data    二进制文件内容
+     * @param ext     扩展名
+     * @param docId   文档ID
+     * @param spaceId 空间ID
+     */
+    private void insertEs(byte[] data, String ext, Long docId, Long spaceId) {
+        //组装ES索引名
+        String indexName = "docs_" + spaceId;
+
+        Map<String, Function<byte[], String>> handlerMap = new HashMap<>(16);
+        handlerMap.put(".docx", FileContentUtils::getContentDocx);
+        handlerMap.put(".doc", FileContentUtils::getContentDoc);
+        handlerMap.put(".wps", FileContentUtils::getContentWps);
+        handlerMap.put(".txt", FileContentUtils::getContentTxt);
+
+        Function<byte[], String> handler = handlerMap.get(ext);
+        if (handler != null) {
+            String content = handler.apply(data);
+            System.err.println(content);
+            EsDocInfo esDocInfo = new EsDocInfo(docId, content);
+            esConfig.setIndexName(indexName);
+            esDocInfoService.save(esDocInfo);
+        }
+    }
 }

+ 85 - 0
doc-biz/src/main/java/com/doc/biz/controller/ElasticSearchController.java

@@ -0,0 +1,85 @@
+package com.doc.biz.controller;
+
+import com.alibaba.fastjson2.JSON;
+import com.doc.biz.domain.DocInfo;
+import com.doc.biz.domain.DocSpace;
+import com.doc.biz.domain.EsDocInfo;
+import com.doc.biz.service.IDocInfoService;
+import com.doc.biz.service.IDocSpaceService;
+import com.doc.biz.service.IEsDocInfoService;
+import com.doc.common.config.EsConfig;
+import com.doc.common.enums.SpaceType;
+import com.doc.common.utils.SecurityUtils;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
+import org.springframework.data.elasticsearch.core.SearchHit;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+
+import javax.annotation.Resource;
+import javax.print.Doc;
+import java.util.List;
+
+/**
+ * es查询控制器
+ *
+ * @author wukai
+ */
+@RestController
+@RequestMapping("es")
+public class ElasticSearchController {
+
+    @Resource
+    private ElasticsearchRestTemplate elasticsearchRestTemplate;
+    @Resource
+    private IEsDocInfoService esDocInfoService;
+    @Resource
+    private IDocInfoService infoService;
+    @Resource
+    private EsConfig esConfig;
+    @Resource
+    private IDocSpaceService docSpaceService;
+
+
+    /**
+     * 模糊查询ik分词器分词的content字段
+     *
+     * @return
+     */
+    @GetMapping("/query")
+    public String query(@RequestParam(value = "type", required = true) String type, @RequestParam(value = "content", required = true) String content, @RequestParam(value = "page") int page, @RequestParam(value = "size") int size) {
+        DocSpace space = new DocSpace();
+        space.setSpaceType(type);
+
+        if (type.equals(SpaceType.DEPT.getValue())) {
+            space.setOwner(SecurityUtils.getDeptId());
+        } else if (type.equals(SpaceType.PERSONAL.getValue())) {
+            space.setOwner(SecurityUtils.getUserId());
+        }
+        List<DocSpace> list = docSpaceService.selectDocSpaceList(space);
+        if (list.size() > 0) {
+            String indexName = "docs_" + list.get(0).getSpaceId();
+            esConfig.setIndexName(indexName);
+            try {
+                List<SearchHit<EsDocInfo>> all = esDocInfoService.findByContent(content, PageRequest.of(page, size));
+                for (SearchHit<EsDocInfo> re : all) {
+                    EsDocInfo esDocInfo = re.getContent();
+                    DocInfo docInfo = new DocInfo();
+                    docInfo.setDocId(esDocInfo.getId());
+                    docInfo = infoService.selectDocInfoByDocId(esDocInfo.getId());
+                    esDocInfo.setDocInfo(docInfo);
+//            re.getContent().
+                }
+                return JSON.toJSONString(all);
+            } catch (Exception e) {
+                e.printStackTrace();
+                return null;
+            }
+        } else {
+            return null;
+        }
+    }
+
+}

+ 34 - 0
doc-biz/src/main/java/com/doc/biz/domain/EsDocInfo.java

@@ -0,0 +1,34 @@
+package com.doc.biz.domain;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.Accessors;
+import org.springframework.data.annotation.Id;
+import org.springframework.data.elasticsearch.annotations.Document;
+import org.springframework.data.elasticsearch.annotations.Field;
+import org.springframework.data.elasticsearch.annotations.FieldType;
+
+/**
+ * @author wukai
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Accessors(chain = true)
+@Document(indexName = "#{@esConfig.getIndexName()}", createIndex = false)
+public class EsDocInfo {
+
+    @Id
+    private Long id;
+
+    @Field(type = FieldType.Text, analyzer = "ik_max_word")
+    private String content;
+
+    private DocInfo docInfo;
+
+    public EsDocInfo(Long id, String content) {
+        this.id = id;
+        this.content = content;
+    }
+}

+ 41 - 0
doc-biz/src/main/java/com/doc/biz/service/IEsDocInfoService.java

@@ -0,0 +1,41 @@
+package com.doc.biz.service;
+
+import com.doc.biz.domain.EsDocInfo;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.elasticsearch.annotations.Highlight;
+import org.springframework.data.elasticsearch.annotations.HighlightField;
+import org.springframework.data.elasticsearch.core.SearchHit;
+import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
+
+import java.io.InputStream;
+import java.util.List;
+
+
+/**
+ * @author wukai
+ */
+public interface IEsDocInfoService extends ElasticsearchRepository<EsDocInfo, Long> {
+
+    /**
+     * 模糊查询content
+     *
+     * @param content  查询内容
+     * @param pageable 分页参数
+     * @return
+     */
+    @Highlight(fields = {
+            @HighlightField(name = "content")
+    })
+    List<SearchHit<EsDocInfo>> findByContent(String content, Pageable pageable);
+
+    /**
+     * 模糊查询content
+     *
+     * @param content
+     * @return
+     */
+    @Highlight(fields = {
+            @HighlightField(name = "content")
+    })
+    List<SearchHit<EsDocInfo>> findByContent(String content);
+}

+ 0 - 1
doc-biz/src/main/java/com/doc/biz/service/impl/MongoServiceImpl.java

@@ -97,7 +97,6 @@ public class MongoServiceImpl implements IMongoService {
             }
         }
 
-
         return option.map(DocumentVO::new).orElse(null);
     }
 

+ 7 - 0
doc-common/pom.xml

@@ -154,6 +154,13 @@
             <version>5.8.18</version>
         </dependency>
 
+        <!-- POI-word文件处理需要 -->
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-scratchpad</artifactId>
+            <version>5.2.2</version>
+        </dependency>
+
     </dependencies>
 
 </project>

+ 19 - 0
doc-common/src/main/java/com/doc/common/config/EsConfig.java

@@ -0,0 +1,19 @@
+package com.doc.common.config;
+
+import org.springframework.stereotype.Component;
+
+/**
+ * @author wukai
+ */
+@Component(value = "esConfig")
+public class EsConfig {
+    private String indexName;
+
+    public void setIndexName(String indexName) {
+        this.indexName = indexName;
+    }
+
+    public String getIndexName() {
+        return indexName;
+    }
+}

+ 128 - 0
doc-common/src/main/java/com/doc/common/utils/FileContentUtils.java

@@ -0,0 +1,128 @@
+package com.doc.common.utils;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+/**
+ * 读取文件内容
+ * WPS WORD TXT
+ *
+ * @author wukai
+ */
+public class FileContentUtils {
+
+    /**
+     * 获取正文文件内容,docx方法
+     *
+     * @param data 二进制文件内容
+     * @return
+     */
+    public static String getContentDocx(byte[] data) {
+        // 0表示获取正常,1表示获取异常
+        try (InputStream is = new ByteArrayInputStream(data)) {
+            //根据需求入参也可以改为文件路径,对应的输入流部分改为new File(路径)即可
+            // 2007版本的word
+            XWPFDocument xwpf = new XWPFDocument(is);
+            // 2007版本,仅支持docx文件处理
+            List<XWPFParagraph> paragraphs = xwpf.getParagraphs();
+            StringBuilder content = new StringBuilder();
+
+            for (XWPFParagraph paragraph : paragraphs) {
+                if (!paragraph.getParagraphText().startsWith("    ")) {
+                    content.append(paragraph.getParagraphText().trim()).append("\r\n");
+                } else {
+                    content.append(paragraph.getParagraphText());
+                }
+            }
+            return content.toString();
+        } catch (Exception e) {
+            e.printStackTrace();
+            return "";
+        }
+    }
+
+    /**
+     * 获取正文文件内容,doc方法
+     *
+     * @param data 二进制文件内容
+     * @return
+     */
+    public static String getContentDoc(byte[] data) {
+        try (InputStream is = new ByteArrayInputStream(data)) {
+            // 2003版本的word
+            WordExtractor wordExtractor = new WordExtractor(is);
+            // 2003版本 仅doc格式文件可处理,docx文件不可处理
+            String[] text = wordExtractor.getParagraphText();
+            StringBuilder content = new StringBuilder();
+
+            // 获取段落,段落缩进无法获取,可以在前添加空格填充
+            for (String str : text) {
+                if (!str.startsWith("    ")) {
+                    content.append(str.trim()).append("\r\n");
+                } else {
+                    content.append(str);
+                }
+            }
+            return content.toString();
+        } catch (IllegalArgumentException e) {
+            //做兼容,因为onlyoffice在线编辑时,会将2003的文档保存为word2007的格式。
+            return getContentDocx(data);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return "";
+        }
+    }
+
+    /**
+     * 获取正文文件内容,wps方法
+     *
+     * @param data 二进制文件内容
+     * @return
+     */
+    public static String getContentWps(byte[] data) {
+        // 0表示获取正常,1表示获取异常
+        try (InputStream is = new ByteArrayInputStream(data)) {
+            // wps版本word
+            HWPFDocument hwpf = new HWPFDocument(is);
+
+            // 文档文本内容
+            String[] text = new WordExtractor(hwpf).getParagraphText();
+            StringBuilder content = new StringBuilder();
+            if (text != null && text.length > 0) {
+                for (String str : text) {
+                    if (!str.startsWith("    ")) {
+                        content.append(str.trim()).append("\r\n");
+                    } else {
+                        content.append(str);
+                    }
+                }
+            }
+            return content.toString();
+        } catch (Exception e) {
+            e.printStackTrace();
+            return "";
+        }
+    }
+
+    public static String getContentTxt(byte[] data) {
+        return new String(data, StandardCharsets.UTF_8).trim();
+//        try (InputStream is = new ByteArrayInputStream(data); BufferedReader br = new BufferedReader(data)) {
+//            StringBuilder sb = new StringBuilder();
+//            String line;
+//            while ((line = br.readLine()) != null) {
+//                sb.append(line).append(System.lineSeparator());
+//            }
+//            return sb.toString();
+//        } catch (IOException e) {
+//            e.printStackTrace();
+//            return "";
+//        }
+    }
+}