Java实现Word/Pdf/TXT转html的示例
程序员文章站
2022-03-28 17:42:14
引言: 最近公司在做一个教育培训学习及在线考试的项目,本人主要从事网络课程模块,主要做课程分类,课程,课件的创建及在线学习和统计的功能,因为课件涉及到多种类型,像视频,音频,图文,外部链接及文档类型...
引言:
最近公司在做一个教育培训学习及在线考试的项目,本人主要从事网络课程模块,主要做课程分类,课程,课件的创建及在线学习和统计的功能,因为课件涉及到多种类型,像视频,音频,图文,外部链接及文档类型.其中就涉及到一个问题,就是文档型课件课程在网页上的展示和学习问题,因为要在线统计学习的课程,学习的人员,学习的时长,所以不能像传统做法将文档下载到本地学习,那样就不受系统控制了,所以最终的方案是,在上传文档型课件的时候,将其文件对应的转换成html文件,以便在网页上能够浏览学习
下边主要针对word,pdf和txt文本文件进行转换
一:java实现将word转换为html
1:引入依赖
<dependency> <groupid>fr.opensagres.xdocreport</groupid> <artifactid>fr.opensagres.xdocreport.document</artifactid> <version>1.0.5</version> </dependency> <dependency> <groupid>fr.opensagres.xdocreport</groupid> <artifactid>org.apache.poi.xwpf.converter.xhtml</artifactid> <version>1.0.5</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>poi</artifactid> <version>3.12</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> <artifactid>poi-scratchpad</artifactid> <version>3.12</version> </dependency>
2:代码demo
package com.svse.controller; import javax.xml.parsers.documentbuilderfactory; import javax.xml.parsers.parserconfigurationexception; import javax.xml.transform.outputkeys; import javax.xml.transform.transformer; import javax.xml.transform.transformerexception; import javax.xml.transform.transformerfactory; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult; import org.apache.poi.hwpf.hwpfdocument; import org.apache.poi.hwpf.converter.picturesmanager; import org.apache.poi.hwpf.converter.wordtohtmlconverter; import org.apache.poi.hwpf.usermodel.picturetype; import org.apache.poi.xwpf.converter.core.basicuriresolver; import org.apache.poi.xwpf.converter.core.fileimageextractor; import org.apache.poi.xwpf.converter.core.fileuriresolver; import org.apache.poi.xwpf.converter.core.iuriresolver; import org.apache.poi.xwpf.converter.core.ixwpfconverter; import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; import org.apache.poi.xwpf.usermodel.xwpfdocument; /** * word 转换成html */ public class testwordtohtml { public static final string storagepath="c://works//files//"; public static final string ip="192.168.30.222"; public static final string port="8010"; public static void main(string[] args) throws ioexception, transformerexception, parserconfigurationexception { testwordtohtml wt=new testwordtohtml(); //wt.word2003tohtml("甲骨文考证.doc"); wt.word2007tohtml("甲骨文考证.docx"); } /** * 2003版本word转换成html * @throws ioexception * @throws transformerexception * @throws parserconfigurationexception */ public void word2003tohtml(string filename) throws ioexception, transformerexception, parserconfigurationexception { final string imagepath = storagepath+"fileimage/";//解析时候如果doc文件中有图片 图片会保存在此路径 final string strranstring=getrandomnum(); string filepath =storagepath; string htmlname =filename.substring(0, filename.indexof("."))+ "2003.html"; final string file = filepath + filename; inputstream input = new fileinputstream(new file(file)); hwpfdocument worddocument = new hwpfdocument(input); wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument()); //设置图片存放的位置 wordtohtmlconverter.setpicturesmanager(new picturesmanager() { public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) { file imgpath = new file(imagepath); if(!imgpath.exists()){//图片目录不存在则创建 imgpath.mkdirs(); } file file = new file(imagepath +strranstring+suggestedname); try { outputstream os = new fileoutputstream(file); os.write(content); os.close(); } catch (filenotfoundexception e) { e.printstacktrace(); } catch (ioexception e) { e.printstacktrace(); } return "http://"+ip+":"+port+"//uploadfile/fileimage/"+strranstring+suggestedname; // return imagepath +strranstring+suggestedname; } }); //解析word文档 wordtohtmlconverter.processdocument(worddocument); document htmldocument = wordtohtmlconverter.getdocument(); file htmlfile = new file(filepath +strranstring+htmlname); outputstream outstream = new fileoutputstream(htmlfile); domsource domsource = new domsource(htmldocument); streamresult streamresult = new streamresult(outstream); transformerfactory factory = transformerfactory.newinstance(); transformer serializer = factory.newtransformer(); serializer.setoutputproperty(outputkeys.encoding, "utf-8"); serializer.setoutputproperty(outputkeys.indent, "yes"); serializer.setoutputproperty(outputkeys.method, "html"); serializer.transform(domsource, streamresult); outstream.close(); system.out.println("生成html文件路径:"+ "http://"+ip+":"+port+"//uploadfile/"+strranstring+htmlname); } /** * 2007版本word转换成html * @throws ioexception */ public void word2007tohtml(string filename) throws ioexception { final string strranstring=getrandomnum(); string filepath = storagepath+strranstring; string htmlname =filename.substring(0, filename.indexof("."))+ "2007.html"; file f = new file(storagepath+filename); if (!f.exists()) { system.out.println("sorry file does not exists!"); } else { if (f.getname().endswith(".docx") || f.getname().endswith(".docx")) { try { // 1) 加载word文档生成 xwpfdocument对象 inputstream in = new fileinputstream(f); xwpfdocument document = new xwpfdocument(in); // 2) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录) file imagefolderfile = new file(filepath); xhtmloptions options = xhtmloptions.create().uriresolver(new fileuriresolver(imagefolderfile)); options.setextractor(new fileimageextractor(imagefolderfile)); options.uriresolver(new iuriresolver() { public string resolve(string uri) { //http://192.168.30.222:8010//uploadfile/.... return "http://"+ip+":"+port+"//uploadfile/"+strranstring +"/"+ uri; } }); options.setignorestylesifunused(false); options.setfragment(true); // 3) 将 xwpfdocument转换成xhtml outputstream out = new fileoutputstream(new file(filepath + htmlname)); ixwpfconverter<xhtmloptions> converter = xhtmlconverter.getinstance(); converter.convert(document,out, options); //xhtmlconverter.getinstance().convert(document, out, options); system.out.println("html路径:"+"http://"+ip+":"+port+"//uploadfile/"+strranstring+htmlname); } catch (exception e) { e.printstacktrace(); } } else { system.out.println("enter only ms office 2007+ files"); } } } /** *功能说明:生成时间戳 *创建人:zsq *创建时间:2019年12月7日 下午2:37:09 * */ public static string getrandomnum(){ date dt = new date(); simpledateformat sdf = new simpledateformat("yyyymmddhhmmss"); string str=sdf.format(dt); return str; } }
二:java实现将pdf转换为html
1: 引入依赖
<dependency> <groupid>net.sf.cssbox</groupid> <artifactid>pdf2dom</artifactid> <version>1.7</version> </dependency> <dependency> <groupid>org.apache.pdfbox</groupid> <artifactid>pdfbox</artifactid> <version>2.0.12</version> </dependency> <dependency> <groupid>org.apache.pdfbox</groupid> <artifactid>pdfbox-tools</artifactid> <version>2.0.12</version> </dependency>
2:代码demo
public class pdftohtml { /* pdf转换html */ public void pdftohtmltest(string inpdfpath,string outputhtmlpath) { // string outputpath = "c:\\works\\files\\zsq保密知识测试题库.html"; //try() 写在()里面会自动关闭流 try{ bufferedwriter out = new bufferedwriter(new outputstreamwriter(new fileoutputstream(new file(outputhtmlpath)),"utf-8")); //加载pdf文档 //pddocument document = pddocument.load(bytes); pddocument document = pddocument.load(new file(inpdfpath)); pdfdomtree pdfdomtree = new pdfdomtree(); pdfdomtree.writetext(document,out); } catch (exception e) { e.printstacktrace(); } } public static void main(string[] args) throws ioexception { pdftohtml ph=new pdftohtml(); string pdfpath="c:\\works\\files\\武研中心行政考勤制度.pdf"; string outputpath="c:\\works\\files\\武研中心行政考勤制度.html"; ph.pdftohtmltest(pdfpath,outputpath); } }
三:java实现将txt转换为html
/* * txt文档转html filepath:txt原文件路径 htmlposition:转化后生成的html路径 */ public static void txttohtml(string filepath, string htmlposition) { try { //string encoding = "gbk"; file file = new file(filepath); if (file.isfile() && file.exists()) { // 判断文件是否存在 inputstreamreader read = new inputstreamreader(new fileinputstream(file), "gbk"); // 考虑到编码格式 bufferedreader bufferedreader = new bufferedreader(read); // 写文件 fileoutputstream fos = new fileoutputstream(new file(htmlposition)); outputstreamwriter osw = new outputstreamwriter(fos, "gbk"); bufferedwriter bw = new bufferedwriter(osw); string linetxt = null; while ((linetxt = bufferedreader.readline()) != null) { bw.write("   "+linetxt + "</br>"); } bw.close(); osw.close(); fos.close(); read.close(); } else { system.out.println("找不到指定的文件"); } } catch (exception e) { system.out.println("读取文件内容出错"); e.printstacktrace(); } }
以上就是java实现word/pdf/txt转html的示例的详细内容,更多关于java word/pdf/txt转html的资料请关注其它相关文章!