poi转换HTML(ppt,word excel,txt包含03,07两个版本)
程序员文章站
2022-04-09 21:42:08
...
package com.zjqy.qbcs.controller;
import com.microsoft.schemas.office.visio.x2012.main.CellType;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;
public class Aaa{
/**
<!--===============================================-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
*/
public static void main(String[] args) throws Exception{
// word2007ToHtml();
// DocxToWord03Html("D:/ppt/03a - 副本.doc","D:/ppt/","03a_sd.html");
// PoiWord03ToHtml();
// Excel07ToHtml("D:/ppt/asd.xlsx","D:/ppt/","123.html");
excel03ToHtml("D:/ppt/as03d.xls","D:/ppt/","123-03.html");
}
/**
* 2007的 word转html
* @throws Exception
*/
public static void word2007ToHtml() throws Exception {
String filepath = "D:/ppt/";
String sourceFileName =filepath+"aa.docx";
String targetFileName = filepath+"1496717486420.html";
String imagePathStr = filepath+"/image/";//将word中的图片复制一份放到这个文件夹下
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
// html中图片的路径
options.URIResolver(new BasicURIResolver("image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
}
/**
*
* @param wordpath
* word文件
* @param htmlpath
* HTML路径
* @param htmlname
* HTML名字
* @throws Throwable
*/
public static void DocxToWord03Html(String wordpath, String htmlpath,
String htmlname) throws Exception {
//把本地的word变成流
InputStream input = new FileInputStream(wordpath);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType
pictureType,String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
// @Override
// public String savePicture(byte[] content, PictureType pictureType,
// String suggestedName) {
// return suggestedName;
// }
});
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(htmlpath
+htmlname+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
"utf-8");
}
/**
* word03版本(.doc)转html
* poi:word03在线预览
* */
public static void PoiWord03ToHtml() throws IOException, ParserConfigurationException, TransformerException{
final String path = "D:/ppt/";
final String file = "D:/ppt/03a.doc";
InputStream input = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) { //图片在html页面加载路径
return "image\\"+suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
//获取文档中所有图片
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {//图片保存在文件夹的路径
pic.writeImageContent(new FileOutputStream(path+"image/"
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
//创建html页面并将文档中内容写入页面
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toString("UTF-8"));
FileUtils.writeStringToFile(new File(path, "word03.html"), content, "utf-8");
}
/**
* excel07转html filename:要读取的文件所在文件夹 filepath:文件名 htmlname:生成html名称
* path:html存放路径
* */
public static void Excel07ToHtml(String filename, String htmlpath,
String htmlname) throws Exception {
//Workbook workbook = null;
InputStream is = new FileInputStream(filename);
try {
String html = "";
XSSFWorkbook workbook = new XSSFWorkbook(is);
for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
Sheet sheet = workbook.getSheetAt(numSheet);
if (sheet == null) {
continue;
}
html += "=======================" + sheet.getSheetName()
+ "=========================<br><br>";
int firstRowIndex = sheet.getFirstRowNum();
int lastRowIndex = sheet.getLastRowNum();
html += "<table border='1' align='left'>";
Row firstRow = sheet.getRow(firstRowIndex);
if (firstRow==null)continue;
for (int i = firstRow.getFirstCellNum(); i <= firstRow
.getLastCellNum(); i++) {
Cell cell = firstRow.getCell(i);
String cellValue = getCellValue(cell, true);
html += "<th>" + cellValue + "</th>";
}
// 行
for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
Row currentRow = sheet.getRow(rowIndex);
html += "<tr>";
if (currentRow != null) {
int firstColumnIndex = currentRow.getFirstCellNum();
int lastColumnIndex = currentRow.getLastCellNum();
// 列
for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
Cell currentCell = currentRow.getCell(columnIndex);
String currentCellValue = getCellValue(currentCell,
true);
html += "<td>" + currentCellValue + "</td>";
}
} else {
html += " ";
}
html += "</tr>";
}
html += "</table>";
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
FileUtils.writeStringToFile(new File(htmlpath, htmlname), html,
"gbk");
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 读取单元格
*
*/
private static String getCellValue(Cell cell, boolean treatAsStr) {
if (cell == null) {
return "";
}
if (treatAsStr) {
cell.getCellType();
}
//POI 4.0中將 getCellTypeEnum() 重命名為 getCellType()
/**
* 类型:原来是用CellType.BOOLEAN来点的 我这里直接写死
* int CELL_TYPE_NUMERIC = 0;
* int CELL_TYPE_STRING = 1;
* int CELL_TYPE_FORMULA = 2;
* int CELL_TYPE_BLANK = 3;
* int CELL_TYPE_BOOLEAN = 4;
* int CELL_TYPE_ERROR = 5;
*/
if (cell.getCellType() == 4) {
return String.valueOf(cell.getBooleanCellValue());
} else if (cell.getCellType() == 0) {
return String.valueOf(cell.getNumericCellValue());
} else {
return String.valueOf(cell.getStringCellValue());
}
}
/**
*03excel
* @param wordpath
* word文件
* @param htmlpath
* HTML路径
* @param htmlname
* HTML名字
* @throws Throwable
*/
public static void excel03ToHtml(String wordpath, String htmlpath,
String htmlname) throws Exception {
InputStream input = new FileInputStream(wordpath);
HSSFWorkbook excelBook = new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(htmlpath
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = excelToHtmlConverter.getDocument();
// 去掉Excel头行
excelToHtmlConverter.setOutputColumnHeaders(false);
// 去掉Excel行号
excelToHtmlConverter.setOutputRowNumbers(false);
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
"utf-8");
}
//ppt07转HTML
public static void pptToHtml(String path, String infile, String htmlname,
String imgname) throws IOException{
File file = new File(infile);
String imghtml = "";
FileOutputStream out = null;
XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
// getting the dimensions and size of the slide
Dimension pgsize = ppt.getPageSize();
List<XSLFSlide> slide = ppt.getSlides();
try {
for (int i = 0; i < slide.size(); i++) {
// 解决乱码问题
for (XSLFShape shape : slide.get(i).getShapes()) {
if (shape instanceof XSLFTextShape) {
XSLFTextShape tsh = (XSLFTextShape) shape;
for (XSLFTextParagraph p : tsh) {
for (XSLFTextRun r : p) {
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width,
pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
// clear the drawing area
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
pgsize.height));
// render
slide.get(i).draw(graphics);
// RenderedImage imag =img;
// creating an image file as output
String imgs = path + imgname + (i + 1) + ".png";
imghtml += "<img src=\'"
+ imgs
+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
out = new FileOutputStream(imgs);
javax.imageio.ImageIO.write(img, "png", out);
// ppt.write(out);
}
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer;
serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"
+ imghtml + "</body></html>";
FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
"utf-8");
System.out.println("Image successfully created");
out.close();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//ppt03转HTML
public static void pptTohTML(String path, String infile, String htmlname,
String imgname) throws IOException{
// creating an empty presentation
File file = new File(infile);
String imghtml = "";
FileOutputStream out = null;
HSLFSlideShow ppt = new HSLFSlideShow(new FileInputStream(file));
// getting the dimensions and size of the slide
Dimension pgsize = ppt.getPageSize();
List<HSLFSlide> slide = ppt.getSlides();
try {
for (int i = 0; i < slide.size(); i++) {
// 解决乱码问题
for (HSLFShape shape : slide.get(i).getShapes()) {
if (shape instanceof HSLFTextShape) {
HSLFTextShape tsh = (HSLFTextShape) shape;
for (HSLFTextParagraph p : tsh) {
for (HSLFTextRun r : p) {
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width,
pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
// clear the drawing area
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
pgsize.height));
// render
slide.get(i).draw(graphics);
// RenderedImage imag =img;
// creating an image file as output
String imgs = path + imgname + (i + 1) + ".png";
imghtml += "<img src=\'"
+ imgs
+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
out = new FileOutputStream(imgs);
javax.imageio.ImageIO.write(img, "png", out);
// ppt.write(out);
}
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer;
serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>" +
"<div text-align='center'>"
+ imghtml + "<div></body></html>";
FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
"utf-8");
System.out.println("Image successfully created");
out.close();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//TXT转HTML
public static void txtToHtml(String filePath, String htmlPosition) {
try {
String encoding = "GBK";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);
// 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
// 写文件
FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
OutputStreamWriter osw = new OutputStreamWriter(fos, "gbk");
BufferedWriter bw = new BufferedWriter(osw);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
bw.write(lineTxt + "</br>");
}
bw.close();
osw.close();
fos.close();
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}
}