文档展示:使用POI转换DOC为HTML
程序员文章站
2022-03-27 11:58:04
...
上接 文档展示:使用OpenOffice转换文档为HTML
http://zhuyufufu.iteye.com/admin/blogs/2012476
这篇文章研究使用POI转文档为HTML,并比较其与OpenOffice转换的效果
上代码:
转换的效果不如OpenOffice,有部分乱码
http://zhuyufufu.iteye.com/admin/blogs/2012476
这篇文章研究使用POI转文档为HTML,并比较其与OpenOffice转换的效果
上代码:
package com.zas.doc.demo; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; /** * @author zas */ public class PoiWordToHtml { /** * 转换doc为HTML * @param path * @param file * @throws Throwable */ public static void convert(String path, String file) throws Throwable { InputStream input = new FileInputStream (path + file); HWPFDocument wordDocument = new HWPFDocument (input); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() ); wordToHtmlConverter.setPicturesManager (new PicturesManager() { public String savePicture (byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return suggestedName; } }); wordToHtmlConverter.processDocument (wordDocument); List<?> pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get (i); try { pic.writeImageContent (new FileOutputStream (path + pic.suggestFullFileName() ) ); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource (htmlDocument); StreamResult streamResult = new StreamResult (outStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty (OutputKeys.ENCODING, "gbk"); serializer.setOutputProperty (OutputKeys.INDENT, "yes"); serializer.setOutputProperty (OutputKeys.METHOD, "html"); serializer.transform (domSource, streamResult); outStream.close(); String content = new String (outStream.toByteArray() ); writeFile (content, path + "1111111111.html", "gbk"); } /** * 写文件 * @param content * @param path * @param encode */ public static void writeFile(String content, String path, String encode) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, encode)); bw.write(content); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (bw != null){ bw.close(); } if (fos != null){ fos.close(); } } catch (IOException ie) { ie.printStackTrace(); } } } public static void main (String[] args) throws Throwable { final String path = "D:/pdf/poi/"; final String file = "2010110档案管理系统需求分析说明书正式.doc"; // final String file = "转换用.ppt"; // final String file = "专业与院系对应关系.xls"; PoiWordToHtml.convert(path, file); } }
转换的效果不如OpenOffice,有部分乱码