java实现word文件转html文件
程序员文章站
2024-03-04 17:19:42
最近在项目开发中用户提出要在电脑上没有装office时在浏览器中打开word文件,最后确定的逻辑:用户选择想要查看的文件,页面js判断文件是否为word。不是执行下载,是后...
最近在项目开发中用户提出要在电脑上没有装office时在浏览器中打开word文件,最后确定的逻辑:用户选择想要查看的文件,页面js判断文件是否为word。不是执行下载,是后端根据word文件后缀访问对应转换方法。文件已存在对应html文件直接返回html文件地址,不存在先生成对应html文件再返回地址。js直接通过open()打开新的页签,展示word文件内容。新人一枚,如果代码中存在错误或有更好的实现万望指正!
相关jar包
代码
import java.io.bytearrayoutputstream; import java.io.file; import java.io.fileinputstream; import java.io.filenotfoundexception; import java.io.fileoutputstream; import java.io.ioexception; import java.io.inputstream; import java.io.outputstream; import javax.xml.parsers.documentbuilderfactory; import javax.xml.parsers.parserconfigurationexception; import javax.xml.transform.outputkeys; import javax.xml.transform.transformer; import javax.xml.transform.transformerexception; import javax.xml.transform.transformerfactory; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult; import org.apache.poi.hwpf.hwpfdocument; import org.apache.poi.hwpf.converter.picturesmanager; import org.apache.poi.hwpf.converter.wordtohtmlconverter; import org.apache.poi.hwpf.usermodel.picturetype; import org.apache.poi.xwpf.converter.core.basicuriresolver; import org.apache.poi.xwpf.converter.core.fileimageextractor; import org.apache.poi.xwpf.converter.core.fileuriresolver; import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; import org.apache.poi.xwpf.usermodel.xwpfdocument; import org.w3c.dom.document; /** * word 转换成html 2017-2-27 */ public class wordtohtml { /** * 将word2003转换为html文件 2017-2-27 * @param wordpath word文件路径 * @param wordname word文件名称无后缀 * @param suffix word文件后缀 * @throws ioexception * @throws transformerexception * @throws parserconfigurationexception */ public string word2003tohtml(string wordpath,string wordname,string suffix) throws ioexception, transformerexception, parserconfigurationexception { string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator; string htmlname = wordname + ".html"; final string imagepath = htmlpath + "image" + file.separator; //判断html文件是否存在 file htmlfile = new file(htmlpath + htmlname); if(htmlfile.exists()){ return htmlfile.getabsolutepath(); } //原word文档 final string file = wordpath + file.separator + wordname + suffix; inputstream input = new fileinputstream(new file(file)); hwpfdocument worddocument = new hwpfdocument(input); wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument()); //设置图片存放的位置 wordtohtmlconverter.setpicturesmanager(new picturesmanager() { public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) { file imgpath = new file(imagepath); if(!imgpath.exists()){//图片目录不存在则创建 imgpath.mkdirs(); } file file = new file(imagepath + suggestedname); try { outputstream os = new fileoutputstream(file); os.write(content); os.close(); } catch (filenotfoundexception e) { e.printstacktrace(); } catch (ioexception e) { e.printstacktrace(); } //图片在html文件上的路径 相对路径 return "image/" + suggestedname; } }); //解析word文档 wordtohtmlconverter.processdocument(worddocument); document htmldocument = wordtohtmlconverter.getdocument(); //生成html文件上级文件夹 file folder = new file(htmlpath); if(!folder.exists()){ folder.mkdirs(); } //生成html文件地址 outputstream outstream = new fileoutputstream(htmlfile); domsource domsource = new domsource(htmldocument); streamresult streamresult = new streamresult(outstream); transformerfactory factory = transformerfactory.newinstance(); transformer serializer = factory.newtransformer(); serializer.setoutputproperty(outputkeys.encoding, "utf-8"); serializer.setoutputproperty(outputkeys.indent, "yes"); serializer.setoutputproperty(outputkeys.method, "html"); serializer.transform(domsource, streamresult); outstream.close(); return htmlfile.getabsolutepath(); } /** * 2007版本word转换成html 2017-2-27 * @param wordpath word文件路径 * @param wordname word文件名称无后缀 * @param suffix word文件后缀 * @return * @throws ioexception */ public string word2007tohtml(string wordpath,string wordname,string suffix) throws ioexception { string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator; string htmlname = wordname + ".html"; string imagepath = htmlpath + "image" + file.separator; //判断html文件是否存在 file htmlfile = new file(htmlpath + htmlname); if(htmlfile.exists()){ return htmlfile.getabsolutepath(); } //word文件 file wordfile = new file(wordpath + file.separator + wordname + suffix); // 1) 加载word文档生成 xwpfdocument对象 inputstream in = new fileinputstream(wordfile); xwpfdocument document = new xwpfdocument(in); // 2) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录) file imgfolder = new file(imagepath); xhtmloptions options = xhtmloptions.create(); options.setextractor(new fileimageextractor(imgfolder)); //html中图片的路径 相对路径 options.uriresolver(new basicuriresolver("image")); options.setignorestylesifunused(false); options.setfragment(true); // 3) 将 xwpfdocument转换成xhtml //生成html文件上级文件夹 file folder = new file(htmlpath); if(!folder.exists()){ folder.mkdirs(); } outputstream out = new fileoutputstream(htmlfile); xhtmlconverter.getinstance().convert(document, out, options); return htmlfile.getabsolutepath(); } }
文件目录:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。