浏览代码

解决扫描相关一切问题

wukai 2 年之前
父节点
当前提交
8766ea177b

+ 114 - 0
doc-admin/src/test/java/com/test/ImageToPdfConverter.java

@@ -0,0 +1,114 @@
+package com.test;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class ImageToPdfConverter {
+
+    public static void main(String[] args) throws IOException {
+        String path = "D:\\SYSTEM\\Desktop\\temp\\pdf";
+
+        // 1. 下载你的图片转成字节
+        File dir = new File(path + File.separator + "img1");
+        File[] fileList = dir.listFiles();
+
+        // 2. 生成一页 PDF document
+        PDDocument document = new PDDocument();
+        for (File file : fileList) {
+            if (file.isFile()) {
+                // 如果文件
+                add(document, file.getPath());
+            }
+        }
+        // 4. 保存PDF
+        File outputFile = File.createTempFile("doc", ".pdf");
+        File parentFolder = outputFile.getParentFile();
+        if (parentFolder != null && !parentFolder.exists()) {
+            parentFolder.mkdirs();
+        }
+        document.save(outputFile);
+        document.close();
+    }
+
+//    public static void
+
+    private static void add(PDDocument document, PDImageXObject image) {
+        // 这里是你生成PDF自适应图片大小,不设置会默认为A4
+        PDRectangle pageSize = new PDRectangle(image.getWidth(), image.getHeight());
+        PDPage page = new PDPage(pageSize);
+        document.addPage(page);
+        // 3.将 图片 添加进PDF document
+        try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
+            float pageWidth = pageSize.getWidth();
+            float pageHeight = pageSize.getHeight();
+            float imageWidth = image.getWidth();
+            float imageHeight = image.getHeight();
+            float scale = Math.min(pageWidth / imageWidth, pageHeight / imageHeight);
+            float scaledWidth = imageWidth * scale;
+            float scaledHeight = imageHeight * scale;
+            float x = (pageWidth - scaledWidth) / 2;
+            float y = (pageHeight - scaledHeight) / 2;
+            // 这里是将你的图片填充入pdf页
+            contentStream.drawImage(image, x, y, scaledWidth, scaledHeight);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private static void add(PDDocument document, String path) throws IOException {
+        // 这里是你生成PDF自适应图片大小,不设置会默认为A4
+        PDPage page = new PDPage(PDRectangle.A4);
+        document.addPage(page);
+
+        // 3.将 图片 添加进PDF document
+        try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
+            PDImageXObject image = PDImageXObject.createFromFile(path, document);
+            float pageWidth = PDRectangle.A4.getWidth();
+            float pageHeight = PDRectangle.A4.getHeight();
+            float imageWidth = image.getWidth();
+            float imageHeight = image.getHeight();
+            float scale = Math.min(pageWidth / imageWidth, pageHeight / imageHeight);
+            float scaledWidth = imageWidth * scale;
+            float scaledHeight = imageHeight * scale;
+            float x = (pageWidth - scaledWidth) / 2;
+            float y = (pageHeight - scaledHeight) / 2;
+            // 这里是将你的图片填充入pdf页
+            contentStream.drawImage(image, x, y, scaledWidth, scaledHeight);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private static byte[] downloadImage(String imageUrl) throws IOException {
+        HttpClient httpClient = HttpClientBuilder.create().build();
+        HttpGet httpGet = new HttpGet(imageUrl);
+        HttpResponse response = httpClient.execute(httpGet);
+        HttpEntity entity = response.getEntity();
+        InputStream inputStream = entity.getContent();
+        byte[] imageBytes = IOUtils.toByteArray(inputStream);
+        inputStream.close();
+        return imageBytes;
+    }
+
+    private static byte[] getByte(String path) throws IOException {
+        File f = new File(path);
+        try (FileInputStream is = new FileInputStream(f);) {
+            byte[] imageBytes = IOUtils.toByteArray(is);
+            return imageBytes;
+        }
+    }
+}

+ 126 - 0
doc-admin/src/test/java/com/test/PdfUtil.java

@@ -0,0 +1,126 @@
+package com.test;
+
+import com.doc.common.utils.encrypt.Sm2Util;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class PdfUtil {
+    public static void main(String[] args) {
+        String path = "D:\\SYSTEM\\Desktop\\temp\\pdf";
+        String source = path + File.separator + "test.pdf";
+        Pair<Boolean, Object> pair = pdfToImage(source, path + "\\img1\\", "再测", "png");
+        if (pair.getLeft()) {
+            List<Object> fileList = (List<Object>) pair.getRight();
+            fileList.forEach(o -> {
+                System.err.println(o);
+            });
+        }
+
+    }
+
+    /**
+     * 图片文件转PDF
+     *
+     * @param sources sm2加密后的绝对路径
+     * @return pdf文件
+     */
+    public static File imageToPdf(List<String> sources) {
+        //生成一页 PDF document
+        try (PDDocument document = new PDDocument()) {
+            for (String path : sources) {
+                path = Sm2Util.decrypt(path);
+                addPae(document, path);
+            }
+            // 4. 保存PDF
+            File file = File.createTempFile("doc", ".pdf");
+            document.save(file);
+            return file;
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    /**
+     * 添加图片
+     *
+     * @param document doc
+     * @param path     路径
+     * @throws IOException
+     */
+    private static void addPae(PDDocument document, String path) {
+        try {
+            // 这里是你生成PDF自适应图片大小,不设置会默认为A4
+            PDImageXObject image = PDImageXObject.createFromFile(path, document);
+            PDRectangle pageSize = new PDRectangle(image.getWidth(), image.getHeight());
+            PDPage page = new PDPage(PDRectangle.A4);
+            document.addPage(page);
+
+            // 3.将 图片 添加进PDF document
+            try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
+                // 这里是将你的图片填充入pdf页
+                contentStream.drawImage(image, 0, 0);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * @param source      原文件
+     * @param desFilePath 生成图片的路径
+     * @param desFileName 生成图片的名称(多页文档时会变成:名称+下划线+从1开始的数字)
+     * @param imageType   图片类型
+     * @return
+     */
+    public static Pair<Boolean, Object> pdfToImage(String source, String desFilePath, String desFileName, String imageType) {
+        //通过给定的源路径名字符串创建一个File实例
+        File file = new File(source);
+        if (!file.exists()) {
+            return Pair.of(false, "文件不存在,无法转化");
+        }
+        //目录不存在则创建目录
+        File destination = new File(desFilePath);
+        if (!destination.exists()) {
+            boolean flag = destination.mkdirs();
+            System.out.println("创建文件夹结果:" + flag);
+        }
+        try (PDDocument doc = PDDocument.load(file)) {
+            //加载PDF文件
+            PDFRenderer renderer = new PDFRenderer(doc);
+            //获取PDF文档的页数
+            int pageCount = doc.getNumberOfPages();
+            System.out.println("文档一共" + pageCount + "页");
+            List<Object> fileList = new ArrayList<>();
+            for (int i = 0; i < pageCount; i++) {
+                //只有一页的时候文件名为传入的文件名,大于一页的文件名为:文件名_自增加数字(从1开始)
+                String realFileName = pageCount > 1 ? desFileName + "_" + (i + 1) : desFileName;
+                //每一页通过分辨率和颜色值进行转化
+                BufferedImage bufferedImage = renderer.renderImageWithDPI(i, 96 * 2, ImageType.RGB);
+                String filePath = desFilePath + File.separator + realFileName + "." + imageType;
+                //写入文件
+                ImageIO.write(bufferedImage, imageType, new File(filePath));
+                //文件名存入list
+                fileList.add(filePath);
+            }
+            return Pair.of(true, fileList);
+        } catch (IOException e) {
+            e.printStackTrace();
+            return Pair.of(false, "PDF转化图片异常");
+        }
+    }
+}
+

+ 21 - 1
doc-biz/src/main/java/com/doc/biz/service/impl/ElasticSearchServiceImpl.java

@@ -11,12 +11,17 @@ import com.doc.common.config.EsConfig;
 import com.doc.common.constant.Constants;
 import com.doc.common.utils.FileContentUtils;
 import com.doc.common.utils.StringUtils;
+import com.doc.common.utils.file.FileUtils;
+import com.doc.common.utils.file.PdfUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
+import org.springframework.web.multipart.MultipartFile;
 
 import javax.annotation.Resource;
+import java.io.File;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 
@@ -141,7 +146,22 @@ public class ElasticSearchServiceImpl implements IElasticSearchService {
         } else if (handler != null) {
             DocumentVO vo = mongoService.downloadFile(fileId);
             if (vo != null) {
-                return handler.apply(vo.getData());
+                String result = handler.apply(vo.getData());
+                String pdf = ".pdf";
+                if (pdf.equals(fileType) && StringUtils.isEmpty(result)) {
+                    StringBuffer sb = new StringBuffer();
+                    //如果是PDF未解析成功,则另外进行解析工作
+                    List<File> list = PdfUtils.pdfToImage(vo.getData());
+                    list.forEach(file -> {
+                        MultipartFile f = FileUtils.getMultipartFile(file, file.getName());
+                        try {
+                            sb.append(ocrService.recognition(f));
+                        } catch (Exception e) {
+                            log.error("解析图片错误:{}", e.getMessage());
+                        }
+                    });
+                    return sb.toString();
+                }
             }
         }
         return "";

+ 38 - 14
doc-biz/src/main/java/com/doc/scanner/controller/ScannerInfoController.java

@@ -13,6 +13,7 @@ import com.doc.common.enums.EventLevel;
 import com.doc.common.utils.SecurityUtils;
 import com.doc.common.utils.encrypt.Sm2Util;
 import com.doc.common.utils.file.FileUtils;
+import com.doc.common.utils.file.PdfUtils;
 import com.doc.common.utils.poi.ExcelUtil;
 import com.doc.scanner.domain.ScannerFtp;
 import com.doc.scanner.domain.ScannerInfo;
@@ -119,6 +120,7 @@ public class ScannerInfoController extends BaseController {
         ScannerFtp ftp = scannerFtpService.selectScannerFtpByUserid("user" + scannerId);
         List<File> files = new ArrayList<>();
         findFileList(new File(ftp.getHomedirectory()), files);
+        files.sort(Comparator.comparing(File::lastModified));
         List<Map<String, String>> list = new ArrayList<>();
         for (File f : files) {
             File base = new File(RuoYiConfig.getProfile());
@@ -137,26 +139,48 @@ public class ScannerInfoController extends BaseController {
     @PostMapping(value = "/claim")
     @Log(title = "扫描仪管理", businessType = BusinessType.INSERT, eventLevel = EventLevel.MIDDLE)
     public AjaxResult claim(@RequestBody ScannerVO obj) {
-        AtomicInteger index = new AtomicInteger(1);
-        obj.getQ().forEach(q -> {
-            String path = Sm2Util.decrypt(q);
-            File f = new File(path);
-            try (FileInputStream input = new FileInputStream(f);) {
+        if (obj.getMerge()) {
+            //是否合并为PDF
+            File f = PdfUtils.imageToPdf(obj.getQ());
+            if (f != null && f.exists()) {
+                String name = obj.getName() + ".pdf";
+                claimUpload(f, name, obj.getSpaceId(), obj.getDirId());
+            } else {
+                return error("文件已被人认领!");
+            }
+        } else {
+            AtomicInteger index = new AtomicInteger(1);
+            obj.getQ().forEach(q -> {
+                String path = Sm2Util.decrypt(q);
+                File f = new File(path);
                 String name = index.get() == 1 ? obj.getName() : obj.getName() + index;
                 index.getAndIncrement();
                 name += path.substring(path.lastIndexOf("."));
-                MultipartFile file = FileUtils.getMultipartFile(input, name);
+                claimUpload(f, name, obj.getSpaceId(), obj.getDirId());
+            });
+        }
+        return success(1);
+    }
 
-                DocumentVO vo = mongoService.uploadFile(file);
-                docInfoService.upload(vo, obj.getSpaceId(), obj.getDirId());
+    /**
+     * 上传文件及删除
+     *
+     * @param f       文件
+     * @param name    名称
+     * @param spaceId 空间ID
+     * @param dirId   目录ID
+     */
+    private void claimUpload(File f, String name, Long spaceId, Long dirId) {
+        try (FileInputStream input = new FileInputStream(f)) {
+            MultipartFile file = FileUtils.getMultipartFile(input, name);
 
-                FileUtils.deleteFile(path);
-            } catch (Exception e) {
-                logger.error("认领出错啦:{}", e.getMessage());
-            }
+            DocumentVO vo = mongoService.uploadFile(file);
+            docInfoService.upload(vo, spaceId, dirId);
 
-        });
-        return success(1);
+            FileUtils.deleteFile(f.getPath());
+        } catch (Exception e) {
+            logger.error("认领出错啦:{}", e.getMessage());
+        }
     }
 
 

+ 1 - 1
doc-biz/src/main/resources/mapper/scanner/ScannerInfoMapper.xml

@@ -48,7 +48,7 @@
             <if test="scannerName != null  and scannerName != ''">and SCANNER_NAME like concat('%', #{scannerName},
                 '%')
             </if>
-            <if test="scannerIp != null  and scannerIp != ''">and SCANNER_IP = #{scannerIp}</if>
+            <if test="scannerIp != null  and scannerIp != ''">and SCANNER_IP like concat('%', #{scannerIp}, '%')</if>
             <if test="isPublic != null  and isPublic != ''">and IS_PUBLIC = #{isPublic}</if>
             <if test="userId != null  and userId != ''">and USER_ID = #{userId}</if>
             <if test="deptId != null ">and DEPT_ID = #{deptId}</if>

+ 3 - 3
doc-common/src/main/java/com/doc/common/utils/FileContentUtils.java

@@ -76,7 +76,7 @@ public class FileContentUtils {
      * @return
      */
     public static String getContentPdf(byte[] data) {
-        String result = "";
+        StringBuilder result = new StringBuilder();
         try {
             PDDocument load = PDDocument.load(data);
             PDFTextStripper stripper = new PDFTextStripper();
@@ -87,12 +87,12 @@ public class FileContentUtils {
                 stripper.setEndPage(i);
                 String text = stripper.getText(load);
                 //拼接不同页数的数据返回
-                result += text;
+                result.append(text);
             }
         } catch (IOException e) {
             e.printStackTrace();
         }
-        return result;
+        return result.toString();
     }
 
     /**

+ 16 - 0
doc-common/src/main/java/com/doc/common/utils/file/FileUtils.java

@@ -263,6 +263,22 @@ public class FileUtils {
     }
 
     /**
+     * 获取封装得MultipartFile
+     *
+     * @param f        文件
+     * @param fileName fileName
+     * @return MultipartFile
+     */
+    public static MultipartFile getMultipartFile(File f, String fileName) {
+        try {
+            FileInputStream is = new FileInputStream(f);
+            return getMultipartFile(is, fileName);
+        } catch (FileNotFoundException ignored) {
+            return null;
+        }
+    }
+
+    /**
      * FileItem类对象创建
      *
      * @param inputStream inputStream

+ 127 - 0
doc-common/src/main/java/com/doc/common/utils/file/PdfUtils.java

@@ -0,0 +1,127 @@
+package com.doc.common.utils.file;
+
+import com.doc.common.utils.DateUtils;
+import com.doc.common.utils.encrypt.Sm2Util;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * pdf与图片互转工具类
+ *
+ * @author wukai
+ */
+public class PdfUtils {
+    public static void main(String[] args) {
+//        String path = "D:\\SYSTEM\\Desktop\\temp\\pdf";
+//        String source = path + File.separator + "test.pdf";
+//        pdfToImage(source, path + "\\img1", "\\测试一哈哈儿", "png");
+        System.err.println(System.getProperty("java.io.tmpdir"));
+    }
+
+    /**
+     * 图片文件转PDF
+     *
+     * @param sources sm2加密后的绝对路径
+     * @return pdf文件
+     */
+    public static File imageToPdf(List<String> sources) {
+        //1.生成一页 PDF document
+        try (PDDocument document = new PDDocument()) {
+            for (String path : sources) {
+                path = Sm2Util.decrypt(path);
+                File f = new File(path);
+                if (!f.exists()) {
+                    continue;
+                }
+                // 2.生成PDF默认A4大小
+                PDPage page = new PDPage(PDRectangle.A4);
+                document.addPage(page);
+                // 3.将 图片 添加进PDF document
+                try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
+                    PDImageXObject image = PDImageXObject.createFromFile(path, document);
+                    float pageWidth = PDRectangle.A4.getWidth();
+                    float pageHeight = PDRectangle.A4.getHeight();
+                    float imageWidth = image.getWidth();
+                    float imageHeight = image.getHeight();
+                    float scale = Math.min(pageWidth / imageWidth, pageHeight / imageHeight);
+                    float scaledWidth = imageWidth * scale;
+                    float scaledHeight = imageHeight * scale;
+                    float x = (pageWidth - scaledWidth) / 2;
+                    float y = (pageHeight - scaledHeight) / 2;
+                    // 将图片填充入pdf页
+                    contentStream.drawImage(image, x, y, scaledWidth, scaledHeight);
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+            if (document.getPages().getCount() > 0) {
+                // 4. 保存PDF
+                File file = File.createTempFile("doc", ".pdf");
+                document.save(file);
+                for (String path : sources) {
+                    //合并之后删除源文件
+                    path = Sm2Util.decrypt(path);
+                    FileUtils.deleteFile(path);
+                }
+                return file;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+    /**
+     * @param data 源文件二进制
+     * @return pair
+     */
+    public static List<File> pdfToImage(byte[] data) {
+        //目录不存在则创建目录
+        String tmpPath = System.getProperty("java.io.tmpdir") + File.separator + DateUtils.dateTimeNow();
+        String tmpName = "tmp";
+        String imageType = "png";
+        List<File> result = new ArrayList<>();
+        try {
+            Files.createDirectory(Paths.get(tmpPath));
+        } catch (IOException ignored) {
+        }
+
+        try (PDDocument doc = PDDocument.load(data)) {
+            //加载PDF文件
+            PDFRenderer renderer = new PDFRenderer(doc);
+            //获取PDF文档的页数
+            int pageCount = doc.getNumberOfPages();
+            for (int i = 0; i < pageCount; i++) {
+                //只有一页的时候文件名为传入的文件名,大于一页的文件名为:文件名_自增加数字(从101开始)
+                String realFileName = pageCount > 1 ? tmpName + "_" + (i + 101) : tmpName;
+                //每一页通过分辨率和颜色值进行转化
+                BufferedImage bufferedImage = renderer.renderImageWithDPI(i, 96 * 2, ImageType.RGB);
+                String filePath = tmpPath + File.separator + realFileName + "." + imageType;
+                File f = new File(filePath);
+                //写入文件
+                ImageIO.write(bufferedImage, imageType, f);
+                //文件名存入list
+                result.add(f);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        return result;
+    }
+}
+