poi转换HTML（ppt,word excel,txt包含03,07两个版本）

程序员文章站 2022-04-09 21:42:08
...
package com.zjqy.qbcs.controller;

import com.microsoft.schemas.office.visio.x2012.main.CellType;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class Aaa{
/**

 <!--===============================================-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>xdocreport</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.3</version>
        </dependency>
*/

    public static void main(String[] args) throws Exception{
//        word2007ToHtml();
//        DocxToWord03Html("D:/ppt/03a - 副本.doc","D:/ppt/","03a_sd.html");
//        PoiWord03ToHtml();
//        Excel07ToHtml("D:/ppt/asd.xlsx","D:/ppt/","123.html");
        excel03ToHtml("D:/ppt/as03d.xls","D:/ppt/","123-03.html");
    }

    /**
     * 2007的 word转html
     * @throws Exception
     */
    public static void word2007ToHtml() throws Exception {
        String filepath = "D:/ppt/";
        String sourceFileName =filepath+"aa.docx";
        String targetFileName = filepath+"1496717486420.html";
        String imagePathStr = filepath+"/image/";//将word中的图片复制一份放到这个文件夹下
        OutputStreamWriter outputStreamWriter = null;
        try {
            XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
            XHTMLOptions options = XHTMLOptions.create();
            // 存放图片的文件夹
            options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
            // html中图片的路径
            options.URIResolver(new BasicURIResolver("image"));
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
    }


    /**
     *
     * @param wordpath
     *            word文件
     * @param htmlpath
     *            HTML路径
     * @param htmlname
     *            HTML名字
     * @throws Throwable
     */
    public static void DocxToWord03Html(String wordpath, String htmlpath,
                                  String htmlname) throws Exception {
        //把本地的word变成流
        InputStream input = new FileInputStream(wordpath);
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content, PictureType
                    pictureType,String suggestedName, float widthInches, float heightInches) {
                return suggestedName;
            }
//			@Override
//			public String savePicture(byte[] content, PictureType pictureType,
//					String suggestedName) {
//				return suggestedName;
//			}
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(htmlpath
                            +htmlname+ pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toByteArray());
        FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
                "utf-8");
    }
    /**
     * word03版本(.doc)转html
     * poi:word03在线预览
     * */
    public static void PoiWord03ToHtml() throws IOException, ParserConfigurationException, TransformerException{
        final String path = "D:/ppt/";
        final String file = "D:/ppt/03a.doc";
        InputStream input = new FileInputStream(file);
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content, PictureType pictureType,
                                      String suggestedName, float widthInches, float heightInches) {     //图片在html页面加载路径
                return "image\\"+suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        //获取文档中所有图片
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {//图片保存在文件夹的路径
                    pic.writeImageContent(new FileOutputStream(path+"image/"
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        //创建html页面并将文档中内容写入页面
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toString("UTF-8"));
        FileUtils.writeStringToFile(new File(path, "word03.html"), content, "utf-8");

    }


    /**
     * excel07转html filename:要读取的文件所在文件夹 filepath:文件名 htmlname:生成html名称
     * path:html存放路径
     * */
    public static void Excel07ToHtml(String filename, String htmlpath,
                                   String htmlname) throws Exception {

        //Workbook workbook = null;
        InputStream is = new FileInputStream(filename);
        try {
            String html = "";
            XSSFWorkbook workbook = new XSSFWorkbook(is);
            for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
                Sheet sheet = workbook.getSheetAt(numSheet);
                if (sheet == null) {
                    continue;
                }
                html += "=======================" + sheet.getSheetName()
                        + "=========================<br><br>";

                int firstRowIndex = sheet.getFirstRowNum();
                int lastRowIndex = sheet.getLastRowNum();
                html += "<table border='1' align='left'>";
                Row firstRow = sheet.getRow(firstRowIndex);
                if (firstRow==null)continue;
                for (int i = firstRow.getFirstCellNum(); i <= firstRow
                        .getLastCellNum(); i++) {
                    Cell cell = firstRow.getCell(i);
                    String cellValue = getCellValue(cell, true);
                    html += "<th>" + cellValue + "</th>";
                }

                // 行
                for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
                    Row currentRow = sheet.getRow(rowIndex);
                    html += "<tr>";
                    if (currentRow != null) {

                        int firstColumnIndex = currentRow.getFirstCellNum();
                        int lastColumnIndex = currentRow.getLastCellNum();
                        // 列
                        for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
                            Cell currentCell = currentRow.getCell(columnIndex);
                            String currentCellValue = getCellValue(currentCell,
                                    true);
                            html += "<td>" + currentCellValue + "</td>";
                        }
                    } else {
                        html += " ";
                    }
                    html += "</tr>";
                }
                html += "</table>";

                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource();
                StreamResult streamResult = new StreamResult(outStream);

                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                outStream.close();
                FileUtils.writeStringToFile(new File(htmlpath, htmlname), html,
                        "gbk");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     * 读取单元格
     *
     */
    private static String getCellValue(Cell cell, boolean treatAsStr) {
        if (cell == null) {
            return "";
        }
        if (treatAsStr) {
            cell.getCellType();
        }
        //POI 4.0中將 getCellTypeEnum() 重命名為 getCellType()
        /**
         * 类型:原来是用CellType.BOOLEAN来点的 我这里直接写死
         *     int CELL_TYPE_NUMERIC = 0;
         *     int CELL_TYPE_STRING = 1;
         *     int CELL_TYPE_FORMULA = 2;
         *     int CELL_TYPE_BLANK = 3;
         *     int CELL_TYPE_BOOLEAN = 4;
         *     int CELL_TYPE_ERROR = 5;
         */
        if (cell.getCellType() == 4) {
            return String.valueOf(cell.getBooleanCellValue());
        } else if (cell.getCellType() == 0) {
            return String.valueOf(cell.getNumericCellValue());
        } else {
            return String.valueOf(cell.getStringCellValue());
        }
    }


    /**
     *03excel
     * @param wordpath
     *            word文件
     * @param htmlpath
     *            HTML路径
     * @param htmlname
     *            HTML名字
     * @throws Throwable
     */
    public static void excel03ToHtml(String wordpath, String htmlpath,
                                   String htmlname) throws Exception {
        InputStream input = new FileInputStream(wordpath);
        HSSFWorkbook excelBook = new HSSFWorkbook(input);
        ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        excelToHtmlConverter.processWorkbook(excelBook);
        List pics = excelBook.getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(htmlpath
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = excelToHtmlConverter.getDocument();
        // 去掉Excel头行
        excelToHtmlConverter.setOutputColumnHeaders(false);
        // 去掉Excel行号
        excelToHtmlConverter.setOutputRowNumbers(false);
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toByteArray());
        FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
                "utf-8");
    }


//ppt07转HTML


public static void pptToHtml(String path, String infile, String htmlname,
			String imgname) throws IOException{
 
		File file = new File(infile);
		String imghtml = "";
		FileOutputStream out = null;
		XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
 
		// getting the dimensions and size of the slide
		Dimension pgsize = ppt.getPageSize();
		List<XSLFSlide> slide = ppt.getSlides();
		try {
			for (int i = 0; i < slide.size(); i++) {
				// 解决乱码问题
				for (XSLFShape shape : slide.get(i).getShapes()) {
					if (shape instanceof XSLFTextShape) {
						XSLFTextShape tsh = (XSLFTextShape) shape;
						for (XSLFTextParagraph p : tsh) {
							for (XSLFTextRun r : p) {
								r.setFontFamily("宋体");
							}
						}
					}
				}
				BufferedImage img = new BufferedImage(pgsize.width,
						pgsize.height, BufferedImage.TYPE_INT_RGB);
				Graphics2D graphics = img.createGraphics();
				// clear the drawing area
				graphics.setPaint(Color.white);
				graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
						pgsize.height));
				// render
				slide.get(i).draw(graphics);
				// RenderedImage imag =img;
				// creating an image file as output
				String imgs = path + imgname + (i + 1) + ".png";
				imghtml += "<img src=\'"
						+ imgs
						+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
				out = new FileOutputStream(imgs);
				javax.imageio.ImageIO.write(img, "png", out);
				// ppt.write(out);
			}
 
			DOMSource domSource = new DOMSource();
			StreamResult streamResult = new StreamResult(out);
			TransformerFactory tf = TransformerFactory.newInstance();
			Transformer serializer;
 
			serializer = tf.newTransformer();
 
			serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty(OutputKeys.INDENT, "yes");
			serializer.setOutputProperty(OutputKeys.METHOD, "html");
 
			serializer.transform(domSource, streamResult);
 
			String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"
					+ imghtml + "</body></html>";
			FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
					"utf-8");
 
			System.out.println("Image successfully created");
			out.close();
		} catch (TransformerConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (TransformerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

//ppt03转HTML

public static void pptTohTML(String path, String infile, String htmlname,
				String imgname) throws IOException{
	
			// creating an empty presentation
			File file = new File(infile);
			String imghtml = "";
			FileOutputStream out = null;
			HSLFSlideShow ppt = new HSLFSlideShow(new FileInputStream(file));
 
			// getting the dimensions and size of the slide
			Dimension pgsize = ppt.getPageSize();
			List<HSLFSlide> slide = ppt.getSlides();
			try {
				for (int i = 0; i < slide.size(); i++) {
					// 解决乱码问题
					for (HSLFShape shape : slide.get(i).getShapes()) {
						if (shape instanceof HSLFTextShape) {
							HSLFTextShape tsh = (HSLFTextShape) shape;
							for (HSLFTextParagraph p : tsh) {
								for (HSLFTextRun r : p) {
									r.setFontFamily("宋体");
								}
							}
						}
					}
					BufferedImage img = new BufferedImage(pgsize.width,
							pgsize.height, BufferedImage.TYPE_INT_RGB);
					Graphics2D graphics = img.createGraphics();
					// clear the drawing area
					graphics.setPaint(Color.white);
					graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
							pgsize.height));
					// render
					slide.get(i).draw(graphics);
					// RenderedImage imag =img;
					// creating an image file as output
					String imgs = path + imgname + (i + 1) + ".png";
					imghtml += "<img src=\'"
							+ imgs
							+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
					out = new FileOutputStream(imgs);
					javax.imageio.ImageIO.write(img, "png", out);
					// ppt.write(out);
				}
 
				DOMSource domSource = new DOMSource();
				StreamResult streamResult = new StreamResult(out);
				TransformerFactory tf = TransformerFactory.newInstance();
				Transformer serializer;
 
				serializer = tf.newTransformer();
 
				serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
				serializer.setOutputProperty(OutputKeys.INDENT, "yes");
				serializer.setOutputProperty(OutputKeys.METHOD, "html");
 
				serializer.transform(domSource, streamResult);
 
				String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>" +
						"<div text-align='center'>"
						+ imghtml + "<div></body></html>";
				FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
						"utf-8");
 
				System.out.println("Image successfully created");
				out.close();
			} catch (TransformerConfigurationException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (TransformerException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}

//TXT转HTML

public static void txtToHtml(String filePath, String htmlPosition) {
	        try {
	            String encoding = "GBK";
	            File file = new File(filePath);
	            if (file.isFile() && file.exists()) { // 判断文件是否存在
	                InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);
	                // 考虑到编码格式
	                BufferedReader bufferedReader = new BufferedReader(read);
	                // 写文件
	                FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
	                OutputStreamWriter osw = new OutputStreamWriter(fos, "gbk");
	                BufferedWriter bw = new BufferedWriter(osw);
	                String lineTxt = null;
	                while ((lineTxt = bufferedReader.readLine()) != null) {
	                    bw.write(lineTxt + "</br>");
	                }
	                bw.close();
	                osw.close();
	                fos.close();
	                read.close();
	            } else {
	                System.out.println("找不到指定的文件");
	            }
	        } catch (Exception e) {
	            System.out.println("读取文件内容出错");
	            e.printStackTrace();
	        }
	    }


}