word转HTML 基本版
程序员文章站
2022-04-09 22:12:07
...
同时支持doc和docx,话不多说,直接上代码
项目依赖 pom.xml
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>net.sf.jtidy</groupId>
<artifactId>jtidy</artifactId>
<version>r938</version>
</dependency>
正式代码是
package com.zbj.spring.boot.util;
import lombok.Cleanup;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
/**
* WordToHtml
*
* @author weigang
* @create 2017-10-13
**/
public class WordToHtml {
public static void main(String[] args) {
// String sourceFileName = "D:/test/员工劳动合同.docx";
String sourceFileName = "D:/test/员工劳动合同.doc";
try {
convertWordToHtml(sourceFileName);
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
}
public static void convertWordToHtml(String path) throws IOException, ParserConfigurationException, TransformerException {
String htmlPath = "D:/test/index.html";
String imagePath = "D:/test/image/";
if (StringUtils.isBlank(path)) {
return;
}
if (path.endsWith("docx")) { // 2007 及以后
XWPFDocument document = new XWPFDocument(new FileInputStream(path));
XHTMLOptions options = XHTMLOptions.create();
options.setExtractor(new FileImageExtractor(new File(imagePath)));
options.URIResolver(new BasicURIResolver("image"));
@Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath));
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, streamWriter, options);
} else { // 2003 之前
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
// 保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
File imageFile = new File(imagePath);
if(!imageFile.exists()){
imageFile.mkdirs();
}
try (FileOutputStream out = new FileOutputStream(imagePath + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "image/" + name;
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(htmlPath));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
}
}
}
对于word文件,在网上随便下载个合同或者文件即可
下一篇: Spring Boot初学者要知道的干货