欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

Java使用poi将word转换为html

程序员文章站 2024-03-09 15:51:11
使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。 1.导入maven包 ...

使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。

1.导入maven包

<dependency> 
 <groupid>org.apache.poi</groupid> 
 <artifactid>poi</artifactid> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupid>org.apache.poi</groupid> 
 <artifactid>poi-scratchpad</artifactid> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupid>org.apache.poi</groupid> 
 <artifactid>poi-ooxml</artifactid> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupid>fr.opensagres.xdocreport</groupid> 
 <artifactid>xdocreport</artifactid> 
 <version>1.0.6</version> 
</dependency> 
<dependency> 
 <groupid>org.apache.poi</groupid> 
 <artifactid>poi-ooxml-schemas</artifactid> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupid>org.apache.poi</groupid> 
 <artifactid>ooxml-schemas</artifactid> 
 <version>1.3</version> 
</dependency> 

2.转换代码

import org.apache.poi.hwpf.hwpfdocument; 
import org.apache.poi.hwpf.converter.wordtohtmlconverter; 
import org.apache.poi.xwpf.converter.core.basicuriresolver; 
import org.apache.poi.xwpf.converter.core.fileimageextractor; 
import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; 
import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; 
import org.apache.poi.xwpf.usermodel.xwpfdocument; 
import org.w3c.dom.document; 
 
import javax.xml.parsers.documentbuilderfactory; 
import javax.xml.transform.outputkeys; 
import javax.xml.transform.transformer; 
import javax.xml.transform.transformerfactory; 
import javax.xml.transform.dom.domsource; 
import javax.xml.transform.stream.streamresult; 
import java.io.file; 
import java.io.fileinputstream; 
import java.io.fileoutputstream; 
import java.io.outputstreamwriter; 
 
public class test { 
  // doc转换为html 
  void doctohtml() throws exception { 
    string sourcefilename = "c:\\doc\\test.doc"; 
    string targetfilename = "c:\\html\\test.html"; 
    string imagepathstr = "c:\\html\\image\\"; 
    hwpfdocument worddocument = new hwpfdocument(new fileinputstream(sourcefilename)); 
    document document = documentbuilderfactory.newinstance().newdocumentbuilder().newdocument(); 
    wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(document); 
    // 保存图片,并返回图片的相对路径 
    wordtohtmlconverter.setpicturesmanager((content, picturetype, name, width, height) -> { 
      try(fileoutputstream out = new fileoutputstream(imagepathstr + name)){ 
         out.write(content); 
      } catch (exception e) { 
        e.printstacktrace(); 
      }  
      return "image/" + name; 
    }); 
    wordtohtmlconverter.processdocument(worddocument); 
    document htmldocument = wordtohtmlconverter.getdocument(); 
    domsource domsource = new domsource(htmldocument); 
    streamresult streamresult = new streamresult(new file(targetfilename)); 
 
    transformerfactory tf = transformerfactory.newinstance(); 
    transformer serializer = tf.newtransformer(); 
    serializer.setoutputproperty(outputkeys.encoding, "utf-8"); 
    serializer.setoutputproperty(outputkeys.indent, "yes"); 
    serializer.setoutputproperty(outputkeys.method, "html"); 
    serializer.transform(domsource, streamresult); 
  } 
  // docx转换为html 
  public void docxtohtml() throws exception { 
    string sourcefilename = "d:\\ac\\00.docx"; 
    string targetfilename = "d:\\ac\\test.html"; 
    string imagepathstr = "d:\\ac\\image\\"; 
    outputstreamwriter outputstreamwriter = null; 
    try { 
      xwpfdocument document = new xwpfdocument(new fileinputstream(sourcefilename)); 
      xhtmloptions options = xhtmloptions.create(); 
      // 存放图片的文件夹 
      options.setextractor(new fileimageextractor(new file(imagepathstr))); 
      // html中图片的路径 
      options.uriresolver(new basicuriresolver("image")); 
      outputstreamwriter = new outputstreamwriter(new fileoutputstream(targetfilename), "utf-8"); 
      xhtmlconverter xhtmlconverter = (xhtmlconverter) xhtmlconverter.getinstance(); 
      xhtmlconverter.convert(document, outputstreamwriter, options); 
    } finally { 
      if (outputstreamwriter != null) { 
        outputstreamwriter.close(); 
      } 
    } 
  } 

演示地址:

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。