POI读取word文件,(支持HSSF和XSSF两种方式)
程序员文章站
2022-07-13 12:59:03
...
POI读取word文件,(支持HSSF和XSSF两种方式)
1.引用maven(版本必须一致)
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.1</version>
</dependency>
2.读取word
public class POIUtil {
/**
* @Description: POI 读取 word
* @create: 2019-07-27 9:48
* @update logs
*/
public static String readWord(String path) throws Exception {
// WordExtractor extractor = new WordExtractor(is);
String content = null;
File file = new File(path);
if (file.exists() && file.isFile()) {
InputStream is = null;
XWPFDocument xwpfDocument = null;
POIXMLTextExtractor extractor = null;
HWPFDocument hwpfDocument = null;
WordExtractor wordExtractor = null;
try {
is = new FileInputStream(file);
xwpfDocument = new XWPFDocument(is);
extractor = new XWPFWordExtractor(xwpfDocument);
// 文档文本内容
content = extractor.getText();
// // 文档图片内容
// List<XWPFPictureData> pictures = docx.getAllPictures();
// for (XWPFPictureData picture : pictures) {
// byte[] bytev = picture.getData();
// // 输出图片到磁盘
// FileOutputStream out = new FileOutputStream(
// "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
// out.write(bytev);
// out.close();
// }
} catch (FileNotFoundException e) {
} catch (IOException e) {
} catch (OLE2NotOfficeXmlFileException e) {//较低版本的word文件
is = new FileInputStream(file);
hwpfDocument = new HWPFDocument(is);
wordExtractor = new WordExtractor(hwpfDocument);
// 文档文本内容
content = wordExtractor.getText();
} finally {
try {
if (extractor != null) {
extractor.close();
}
if (xwpfDocument != null) {
xwpfDocument.close();
}
if (wordExtractor != null) {
wordExtractor.close();
}
if (hwpfDocument != null) {
hwpfDocument.close();
}
if (is != null) {
is.close();
}
} catch (IOException e) {
}
}
}
return content;
}
public static void main(String[] args) {
String path = "/Users/jj/Desktop/胜多负少的范德萨.doc";
// String path = "/Users/jj/Desktop/测试1 2.doc";
// String path = "/Users/jj/Desktop/测试1.docx";
try {
System.out.println(readWord(path));
} catch (Exception e) {
e.printStackTrace();
}
}
}