欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

poi转换HTML(ppt,word excel,txt包含03,07两个版本)

程序员文章站 2022-04-09 21:42:08
...
package com.zjqy.qbcs.controller;

import com.microsoft.schemas.office.visio.x2012.main.CellType;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class Aaa{
/**

 <!--===============================================-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>xdocreport</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.3</version>
        </dependency>
*/

    public static void main(String[] args) throws Exception{
//        word2007ToHtml();
//        DocxToWord03Html("D:/ppt/03a - 副本.doc","D:/ppt/","03a_sd.html");
//        PoiWord03ToHtml();
//        Excel07ToHtml("D:/ppt/asd.xlsx","D:/ppt/","123.html");
        excel03ToHtml("D:/ppt/as03d.xls","D:/ppt/","123-03.html");
    }

    /**
     * 2007的 word转html
     * @throws Exception
     */
    public static void word2007ToHtml() throws Exception {
        String filepath = "D:/ppt/";
        String sourceFileName =filepath+"aa.docx";
        String targetFileName = filepath+"1496717486420.html";
        String imagePathStr = filepath+"/image/";//将word中的图片复制一份放到这个文件夹下
        OutputStreamWriter outputStreamWriter = null;
        try {
            XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
            XHTMLOptions options = XHTMLOptions.create();
            // 存放图片的文件夹
            options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
            // html中图片的路径
            options.URIResolver(new BasicURIResolver("image"));
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
    }


    /**
     *
     * @param wordpath
     *            word文件
     * @param htmlpath
     *            HTML路径
     * @param htmlname
     *            HTML名字
     * @throws Throwable
     */
    public static void DocxToWord03Html(String wordpath, String htmlpath,
                                  String htmlname) throws Exception {
        //把本地的word变成流
        InputStream input = new FileInputStream(wordpath);
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content, PictureType
                    pictureType,String suggestedName, float widthInches, float heightInches) {
                return suggestedName;
            }
//			@Override
//			public String savePicture(byte[] content, PictureType pictureType,
//					String suggestedName) {
//				return suggestedName;
//			}
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(htmlpath
                            +htmlname+ pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toByteArray());
        FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
                "utf-8");
    }
    /**
     * word03版本(.doc)转html
     * poi:word03在线预览
     * */
    public static void PoiWord03ToHtml() throws IOException, ParserConfigurationException, TransformerException{
        final String path = "D:/ppt/";
        final String file = "D:/ppt/03a.doc";
        InputStream input = new FileInputStream(file);
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content, PictureType pictureType,
                                      String suggestedName, float widthInches, float heightInches) {     //图片在html页面加载路径
                return "image\\"+suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        //获取文档中所有图片
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {//图片保存在文件夹的路径
                    pic.writeImageContent(new FileOutputStream(path+"image/"
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        //创建html页面并将文档中内容写入页面
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toString("UTF-8"));
        FileUtils.writeStringToFile(new File(path, "word03.html"), content, "utf-8");

    }


    /**
     * excel07转html filename:要读取的文件所在文件夹 filepath:文件名 htmlname:生成html名称
     * path:html存放路径
     * */
    public static void Excel07ToHtml(String filename, String htmlpath,
                                   String htmlname) throws Exception {

        //Workbook workbook = null;
        InputStream is = new FileInputStream(filename);
        try {
            String html = "";
            XSSFWorkbook workbook = new XSSFWorkbook(is);
            for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
                Sheet sheet = workbook.getSheetAt(numSheet);
                if (sheet == null) {
                    continue;
                }
                html += "=======================" + sheet.getSheetName()
                        + "=========================<br><br>";

                int firstRowIndex = sheet.getFirstRowNum();
                int lastRowIndex = sheet.getLastRowNum();
                html += "<table border='1' align='left'>";
                Row firstRow = sheet.getRow(firstRowIndex);
                if (firstRow==null)continue;
                for (int i = firstRow.getFirstCellNum(); i <= firstRow
                        .getLastCellNum(); i++) {
                    Cell cell = firstRow.getCell(i);
                    String cellValue = getCellValue(cell, true);
                    html += "<th>" + cellValue + "</th>";
                }

                // 行
                for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
                    Row currentRow = sheet.getRow(rowIndex);
                    html += "<tr>";
                    if (currentRow != null) {

                        int firstColumnIndex = currentRow.getFirstCellNum();
                        int lastColumnIndex = currentRow.getLastCellNum();
                        // 列
                        for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
                            Cell currentCell = currentRow.getCell(columnIndex);
                            String currentCellValue = getCellValue(currentCell,
                                    true);
                            html += "<td>" + currentCellValue + "</td>";
                        }
                    } else {
                        html += " ";
                    }
                    html += "</tr>";
                }
                html += "</table>";

                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource();
                StreamResult streamResult = new StreamResult(outStream);

                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                outStream.close();
                FileUtils.writeStringToFile(new File(htmlpath, htmlname), html,
                        "gbk");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     * 读取单元格
     *
     */
    private static String getCellValue(Cell cell, boolean treatAsStr) {
        if (cell == null) {
            return "";
        }
        if (treatAsStr) {
            cell.getCellType();
        }
        //POI 4.0中將 getCellTypeEnum() 重命名為 getCellType()
        /**
         * 类型:原来是用CellType.BOOLEAN来点的 我这里直接写死
         *     int CELL_TYPE_NUMERIC = 0;
         *     int CELL_TYPE_STRING = 1;
         *     int CELL_TYPE_FORMULA = 2;
         *     int CELL_TYPE_BLANK = 3;
         *     int CELL_TYPE_BOOLEAN = 4;
         *     int CELL_TYPE_ERROR = 5;
         */
        if (cell.getCellType() == 4) {
            return String.valueOf(cell.getBooleanCellValue());
        } else if (cell.getCellType() == 0) {
            return String.valueOf(cell.getNumericCellValue());
        } else {
            return String.valueOf(cell.getStringCellValue());
        }
    }


    /**
     *03excel
     * @param wordpath
     *            word文件
     * @param htmlpath
     *            HTML路径
     * @param htmlname
     *            HTML名字
     * @throws Throwable
     */
    public static void excel03ToHtml(String wordpath, String htmlpath,
                                   String htmlname) throws Exception {
        InputStream input = new FileInputStream(wordpath);
        HSSFWorkbook excelBook = new HSSFWorkbook(input);
        ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        excelToHtmlConverter.processWorkbook(excelBook);
        List pics = excelBook.getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(htmlpath
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = excelToHtmlConverter.getDocument();
        // 去掉Excel头行
        excelToHtmlConverter.setOutputColumnHeaders(false);
        // 去掉Excel行号
        excelToHtmlConverter.setOutputRowNumbers(false);
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toByteArray());
        FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
                "utf-8");
    }


//ppt07转HTML


public static void pptToHtml(String path, String infile, String htmlname,
			String imgname) throws IOException{
 
		File file = new File(infile);
		String imghtml = "";
		FileOutputStream out = null;
		XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
 
		// getting the dimensions and size of the slide
		Dimension pgsize = ppt.getPageSize();
		List<XSLFSlide> slide = ppt.getSlides();
		try {
			for (int i = 0; i < slide.size(); i++) {
				// 解决乱码问题
				for (XSLFShape shape : slide.get(i).getShapes()) {
					if (shape instanceof XSLFTextShape) {
						XSLFTextShape tsh = (XSLFTextShape) shape;
						for (XSLFTextParagraph p : tsh) {
							for (XSLFTextRun r : p) {
								r.setFontFamily("宋体");
							}
						}
					}
				}
				BufferedImage img = new BufferedImage(pgsize.width,
						pgsize.height, BufferedImage.TYPE_INT_RGB);
				Graphics2D graphics = img.createGraphics();
				// clear the drawing area
				graphics.setPaint(Color.white);
				graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
						pgsize.height));
				// render
				slide.get(i).draw(graphics);
				// RenderedImage imag =img;
				// creating an image file as output
				String imgs = path + imgname + (i + 1) + ".png";
				imghtml += "<img src=\'"
						+ imgs
						+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
				out = new FileOutputStream(imgs);
				javax.imageio.ImageIO.write(img, "png", out);
				// ppt.write(out);
			}
 
			DOMSource domSource = new DOMSource();
			StreamResult streamResult = new StreamResult(out);
			TransformerFactory tf = TransformerFactory.newInstance();
			Transformer serializer;
 
			serializer = tf.newTransformer();
 
			serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty(OutputKeys.INDENT, "yes");
			serializer.setOutputProperty(OutputKeys.METHOD, "html");
 
			serializer.transform(domSource, streamResult);
 
			String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"
					+ imghtml + "</body></html>";
			FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
					"utf-8");
 
			System.out.println("Image successfully created");
			out.close();
		} catch (TransformerConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (TransformerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

//ppt03转HTML

public static void pptTohTML(String path, String infile, String htmlname,
				String imgname) throws IOException{
	
			// creating an empty presentation
			File file = new File(infile);
			String imghtml = "";
			FileOutputStream out = null;
			HSLFSlideShow ppt = new HSLFSlideShow(new FileInputStream(file));
 
			// getting the dimensions and size of the slide
			Dimension pgsize = ppt.getPageSize();
			List<HSLFSlide> slide = ppt.getSlides();
			try {
				for (int i = 0; i < slide.size(); i++) {
					// 解决乱码问题
					for (HSLFShape shape : slide.get(i).getShapes()) {
						if (shape instanceof HSLFTextShape) {
							HSLFTextShape tsh = (HSLFTextShape) shape;
							for (HSLFTextParagraph p : tsh) {
								for (HSLFTextRun r : p) {
									r.setFontFamily("宋体");
								}
							}
						}
					}
					BufferedImage img = new BufferedImage(pgsize.width,
							pgsize.height, BufferedImage.TYPE_INT_RGB);
					Graphics2D graphics = img.createGraphics();
					// clear the drawing area
					graphics.setPaint(Color.white);
					graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
							pgsize.height));
					// render
					slide.get(i).draw(graphics);
					// RenderedImage imag =img;
					// creating an image file as output
					String imgs = path + imgname + (i + 1) + ".png";
					imghtml += "<img src=\'"
							+ imgs
							+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
					out = new FileOutputStream(imgs);
					javax.imageio.ImageIO.write(img, "png", out);
					// ppt.write(out);
				}
 
				DOMSource domSource = new DOMSource();
				StreamResult streamResult = new StreamResult(out);
				TransformerFactory tf = TransformerFactory.newInstance();
				Transformer serializer;
 
				serializer = tf.newTransformer();
 
				serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
				serializer.setOutputProperty(OutputKeys.INDENT, "yes");
				serializer.setOutputProperty(OutputKeys.METHOD, "html");
 
				serializer.transform(domSource, streamResult);
 
				String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>" +
						"<div text-align='center'>"
						+ imghtml + "<div></body></html>";
				FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
						"utf-8");
 
				System.out.println("Image successfully created");
				out.close();
			} catch (TransformerConfigurationException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (TransformerException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}

//TXT转HTML

public static void txtToHtml(String filePath, String htmlPosition) {
	        try {
	            String encoding = "GBK";
	            File file = new File(filePath);
	            if (file.isFile() && file.exists()) { // 判断文件是否存在
	                InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);
	                // 考虑到编码格式
	                BufferedReader bufferedReader = new BufferedReader(read);
	                // 写文件
	                FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
	                OutputStreamWriter osw = new OutputStreamWriter(fos, "gbk");
	                BufferedWriter bw = new BufferedWriter(osw);
	                String lineTxt = null;
	                while ((lineTxt = bufferedReader.readLine()) != null) {
	                    bw.write(lineTxt + "</br>");
	                }
	                bw.close();
	                osw.close();
	                fos.close();
	                read.close();
	            } else {
	                System.out.println("找不到指定的文件");
	            }
	        } catch (Exception e) {
	            System.out.println("读取文件内容出错");
	            e.printStackTrace();
	        }
	    }


}

 

相关标签: poi poi转换HTML