Java使用poi将word转换为html
程序员文章站
2024-03-09 15:51:11
使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。
1.导入maven包
...
使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。
1.导入maven包
<dependency> <groupid>org.apache.poi</groupid> <artifactid>poi</artifactid> <version>3.14</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>poi-scratchpad</artifactid> <version>3.14</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>poi-ooxml</artifactid> <version>3.14</version> </dependency> <dependency> <groupid>fr.opensagres.xdocreport</groupid> <artifactid>xdocreport</artifactid> <version>1.0.6</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>poi-ooxml-schemas</artifactid> <version>3.14</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>ooxml-schemas</artifactid> <version>1.3</version> </dependency>
2.转换代码
import org.apache.poi.hwpf.hwpfdocument; import org.apache.poi.hwpf.converter.wordtohtmlconverter; import org.apache.poi.xwpf.converter.core.basicuriresolver; import org.apache.poi.xwpf.converter.core.fileimageextractor; import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; import org.apache.poi.xwpf.usermodel.xwpfdocument; import org.w3c.dom.document; import javax.xml.parsers.documentbuilderfactory; import javax.xml.transform.outputkeys; import javax.xml.transform.transformer; import javax.xml.transform.transformerfactory; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult; import java.io.file; import java.io.fileinputstream; import java.io.fileoutputstream; import java.io.outputstreamwriter; public class test { // doc转换为html void doctohtml() throws exception { string sourcefilename = "c:\\doc\\test.doc"; string targetfilename = "c:\\html\\test.html"; string imagepathstr = "c:\\html\\image\\"; hwpfdocument worddocument = new hwpfdocument(new fileinputstream(sourcefilename)); document document = documentbuilderfactory.newinstance().newdocumentbuilder().newdocument(); wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(document); // 保存图片,并返回图片的相对路径 wordtohtmlconverter.setpicturesmanager((content, picturetype, name, width, height) -> { try(fileoutputstream out = new fileoutputstream(imagepathstr + name)){ out.write(content); } catch (exception e) { e.printstacktrace(); } return "image/" + name; }); wordtohtmlconverter.processdocument(worddocument); document htmldocument = wordtohtmlconverter.getdocument(); domsource domsource = new domsource(htmldocument); streamresult streamresult = new streamresult(new file(targetfilename)); transformerfactory tf = transformerfactory.newinstance(); transformer serializer = tf.newtransformer(); serializer.setoutputproperty(outputkeys.encoding, "utf-8"); serializer.setoutputproperty(outputkeys.indent, "yes"); serializer.setoutputproperty(outputkeys.method, "html"); serializer.transform(domsource, streamresult); } // docx转换为html public void docxtohtml() throws exception { string sourcefilename = "d:\\ac\\00.docx"; string targetfilename = "d:\\ac\\test.html"; string imagepathstr = "d:\\ac\\image\\"; outputstreamwriter outputstreamwriter = null; try { xwpfdocument document = new xwpfdocument(new fileinputstream(sourcefilename)); xhtmloptions options = xhtmloptions.create(); // 存放图片的文件夹 options.setextractor(new fileimageextractor(new file(imagepathstr))); // html中图片的路径 options.uriresolver(new basicuriresolver("image")); outputstreamwriter = new outputstreamwriter(new fileoutputstream(targetfilename), "utf-8"); xhtmlconverter xhtmlconverter = (xhtmlconverter) xhtmlconverter.getinstance(); xhtmlconverter.convert(document, outputstreamwriter, options); } finally { if (outputstreamwriter != null) { outputstreamwriter.close(); } } }
演示地址:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
上一篇: java加密算法--MD5加密和哈希散列带秘钥加密算法源码
下一篇: java简单操作word实例