|
@@ -4,6 +4,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
|
+import org.apache.poi.ss.usermodel.Row;
|
|
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
|
|
import org.apache.poi.xslf.usermodel.XSLFShape;
|
|
|
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
|
@@ -14,10 +15,11 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
|
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
|
-import java.io.FileInputStream;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Paths;
|
|
|
import java.util.List;
|
|
|
|
|
|
/**
|
|
@@ -173,58 +175,66 @@ public class FileContentUtils {
|
|
|
return result.replace("\n\n", "\n");
|
|
|
}
|
|
|
|
|
|
- public static String getContentTxt(byte[] data) {
|
|
|
- return new String(data, StandardCharsets.UTF_8).trim();
|
|
|
-// try (InputStream is = new ByteArrayInputStream(data); BufferedReader br = new BufferedReader(data)) {
|
|
|
-// StringBuilder sb = new StringBuilder();
|
|
|
-// String line;
|
|
|
-// while ((line = br.readLine()) != null) {
|
|
|
-// sb.append(line).append(System.lineSeparator());
|
|
|
-// }
|
|
|
-// return sb.toString();
|
|
|
-// } catch (IOException e) {
|
|
|
-// e.printStackTrace();
|
|
|
-// return "";
|
|
|
-// }
|
|
|
- }
|
|
|
-
|
|
|
- public static void main(String[] args) {
|
|
|
- String path = "D:\\SYSTEM\\Desktop\\temp\\xxx.xlsx";
|
|
|
+ /**
|
|
|
+ * 获取正文文件内容,PDF方法
|
|
|
+ *
|
|
|
+ * @param data 二进制文件内容
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static String getContentExcel(byte[] data) {
|
|
|
String result = "";
|
|
|
- try {
|
|
|
+ try (InputStream is = new ByteArrayInputStream(data);
|
|
|
+ XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is)) {
|
|
|
//创建工作簿对象
|
|
|
- XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(path));
|
|
|
//获取工作簿下sheet的个数
|
|
|
int sheetNum = xssfWorkbook.getNumberOfSheets();
|
|
|
- System.out.println("该excel文件中总共有:" + sheetNum + "个sheet");
|
|
|
//遍历工作簿中的所有数据
|
|
|
for (int i = 0; i < sheetNum; i++) {
|
|
|
//读取第i个工作表
|
|
|
- System.out.println("读取第" + (i + 1) + "个sheet");
|
|
|
XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
|
|
|
//获取最后一行的num,即总行数。此处从0开始
|
|
|
int maxRow = sheet.getLastRowNum();
|
|
|
- System.err.println("总行数:" + maxRow);
|
|
|
- Thread.sleep(1000);
|
|
|
- for (int row = 0; row <= maxRow; row++) {
|
|
|
- //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数***
|
|
|
- if (sheet.getRow(row) != null) {
|
|
|
- int maxRol = sheet.getRow(row).getLastCellNum();
|
|
|
- System.out.println("--------第" + row + "行的数据如下--------");
|
|
|
- for (int rol = 0; rol < maxRol; rol++) {
|
|
|
- System.out.print(sheet.getRow(row).getCell(rol) + " ");
|
|
|
+ for (int rowNum = 0; rowNum <= maxRow; rowNum++) {
|
|
|
+ Row row = sheet.getRow(rowNum);
|
|
|
+ if (row != null) {
|
|
|
+ //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数***
|
|
|
+ int maxRol = row.getLastCellNum();
|
|
|
+ for (int cellNum = 0; cellNum < maxRol; cellNum++) {
|
|
|
+ result += row.getCell(cellNum) + " ";
|
|
|
}
|
|
|
- System.out.println();
|
|
|
+ result += "\n";
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
e.printStackTrace();
|
|
|
- } catch (InterruptedException e) {
|
|
|
- throw new RuntimeException(e);
|
|
|
}
|
|
|
|
|
|
- System.err.println(result.replace("\n\n", "\n"));
|
|
|
+ return result.replace("\n\n", "\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String getContentTxt(byte[] data) {
|
|
|
+ return new String(data, StandardCharsets.UTF_8).trim();
|
|
|
+// try (InputStream is = new ByteArrayInputStream(data); BufferedReader br = new BufferedReader(data)) {
|
|
|
+// StringBuilder sb = new StringBuilder();
|
|
|
+// String line;
|
|
|
+// while ((line = br.readLine()) != null) {
|
|
|
+// sb.append(line).append(System.lineSeparator());
|
|
|
+// }
|
|
|
+// return sb.toString();
|
|
|
+// } catch (IOException e) {
|
|
|
+// e.printStackTrace();
|
|
|
+// return "";
|
|
|
+// }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
+ String path = "D:\\SYSTEM\\Desktop\\temp\\parse\\test.xlsx";
|
|
|
+
|
|
|
+ byte[] bytes = Files.readAllBytes(Paths.get(path));
|
|
|
+
|
|
|
+ String result = getContentExcel(bytes);
|
|
|
+ System.err.println(result);
|
|
|
}
|
|
|
}
|