Java实现将word转换为html的方法示例【doc与docx格式】

程序员文章站 2024-02-14 19:41:10

本文实例讲述了java实现将word转换为html的方法。分享给大家供大家参考，具体如下： public static void main(string[] a...

本文实例讲述了java实现将word转换为html的方法。分享给大家供大家参考，具体如下：

 public static void main(string[] args) throws exception {
 string filepath = "c:/users/administrator/desktop/92个诊疗方案及临床路径/";
 file file = new file(filepath);
 file[] files = file.listfiles();
 string name = null;
 for (file file2 : files) {
  thread.sleep(500);
  name = file2.getname().substring(0, file2.getname().lastindexof("."));
  system.out.println(file2.getname());
  if (file2.getname().endswith(".docx") || file2.getname().endswith(".docx")) {
  casehtm.docx(filepath ,file2.getname(),name +".htm");
  }else{
  casehtm.dox(filepath ,file2.getname(),name +".htm");
  }
  
    }
 }
 /**
 * 转换docx
 * @param filepath
 * @param filename
 * @param htmlname
 * @throws exception
 */
 public static void docx(string filepath ,string filename,string htmlname) throws exception{
 final string file = filepath + filename;
 file f = new file(file); 
 // ) 加载word文档生成 xwpfdocument对象
 inputstream in = new fileinputstream(f);
 xwpfdocument document = new xwpfdocument(in);
 // ) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录)
 file imagefolderfile = new file(filepath);
 xhtmloptions options = xhtmloptions.create().uriresolver(new fileuriresolver(imagefolderfile));
 options.setextractor(new fileimageextractor(imagefolderfile));
 options.setignorestylesifunused(false);
 options.setfragment(true);
 // ) 将 xwpfdocument转换成xhtml
 outputstream out = new fileoutputstream(new file(filepath + htmlname));
 xhtmlconverter.getinstance().convert(document, out, options);
 }
 /**
 * 转换doc
 * @param filepath
 * @param filename
 * @param htmlname
 * @throws exception
 */
 public static void dox(string filepath ,string filename,string htmlname) throws exception{
    final string file = filepath + filename;
    inputstream input = new fileinputstream(new file(file));
    hwpfdocument worddocument = new hwpfdocument(input);
    wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument());
    //解析word文档
    wordtohtmlconverter.processdocument(worddocument);
    document htmldocument = wordtohtmlconverter.getdocument();
    file htmlfile = new file(filepath + htmlname);
    outputstream outstream = new fileoutputstream(htmlfile);
    domsource domsource = new domsource(htmldocument);
    streamresult streamresult = new streamresult(outstream);
    transformerfactory factory = transformerfactory.newinstance();
    transformer serializer = factory.newtransformer();
    serializer.setoutputproperty(outputkeys.encoding, "utf-8");
    serializer.setoutputproperty(outputkeys.indent, "yes");
    serializer.setoutputproperty(outputkeys.method, "html");
    serializer.transform(domsource, streamresult);
    outstream.close();
  }

<dependency>
  <groupid>fr.opensagres.xdocreport</groupid>
  <artifactid>fr.opensagres.xdocreport.document</artifactid>
  <version>1.0.5</version>
</dependency>
<dependency> 
  <groupid>fr.opensagres.xdocreport</groupid> 
  <artifactid>org.apache.poi.xwpf.converter.xhtml</artifactid> 
  <version>1.0.5</version> 
</dependency>
  <dependency>
  <groupid>org.apache.poi</groupid>
  <artifactid>poi</artifactid>
  <version>3.12</version>
</dependency>
<dependency>
  <groupid>org.apache.poi</groupid>
  <artifactid>poi-scratchpad</artifactid>
  <version>3.12</version>
</dependency>

更多关于java算法相关内容感兴趣的读者可查看本站专题：《java文件与目录操作技巧汇总》、《java数据结构与算法教程》、《java操作dom节点技巧总结》和《java缓存操作技巧汇总》

希望本文所述对大家java程序设计有所帮助。

上一篇： Java实现批量修改txt文件名称的方法示例

下一篇： Java JDBC连接数据库常见操作总结