欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用POI读写PowerPoint文件(兼容ppt与pptx版本) javaofficepowerpointpoi 

程序员文章站 2022-06-03 15:12:51
...

调用示例:

 

 

File powerPointFile = new File("D:\\temp.ppt");

//读取PowerPoint文档中所有文本内容,以字符串形式返回  
System.out.println(PowerPointFileUtil.extractTextFromPowerPointFile(powerPointFile , "," , ";"));

 

 

工具类源码:

 

/**
 * BasePowerPointFileUtil.java
 * Copyright ® 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.util.ArrayList;
import java.util.List;

import org.apache.poi.sl.usermodel.AutoShape;
import org.apache.poi.sl.usermodel.Shape;
import org.apache.poi.sl.usermodel.Slide;
import org.apache.poi.sl.usermodel.SlideShow;

/**
 * <p>PowerPoint文件工具基类
 * 
 * <p>通用的PowerPoint文件工具基类,可用于从PowerPoint文档中抽取文本信息
 * 
 * @author  窦海宁, chong0660@sina.com
 * @since   AiyuCommonCore-1.0
 * @version AiyuCommonCore-1.0
 */
public abstract class BasePowerPointFileUtil {

	/**
	 * <p>读取PowerPoint文件中的幻灯片对象
	 * 
	 * @param  slideShow SlideShow对象
	 * 
	 * @return 读取出的工作薄列表
	 * 
	 * @modify 窦海宁, 2017-01-18
	 */
	protected static List readSlideShow(SlideShow slideShow) {

		List slideList = null;
		if (slideShow != null) {

			slideList = new ArrayList();
			List slides = slideShow.getSlides();
			for (int i = 0 ; i < slides.size() ; i++) {

				slideList.add(BasePowerPointFileUtil.readSlide((Slide) slides.get(i)));
			}
		}
		return slideList;
	}

	/**
	 * <p>读取指定的Slide中的数据
	 * 
	 * @param  slide Slide对象
	 * 
	 * @return 读取出的Slide数据列表
	 * 
	 * @modify 窦海宁, 2017-01-18
	 */
	protected static List readSlide(Slide slide) {

		List shapeList = null;
		if (slide != null) {

			shapeList = new ArrayList();
			List shapes = slide.getShapes();
			for (int i = 0 ; i < shapes.size() ; i++) {

				shapeList.add(BasePowerPointFileUtil.readShape((Shape) shapes.get(i)));
			}
		}
		return shapeList;
	}

	/**
	 * <p>读取指定的图形的数据
	 * 
	 * @param  shape Slide中的图形对象
	 * 
	 * @return 读取出的图形数据
	 * 
	 * @modify 窦海宁, 2017-01-18
	 */
	protected static Object readShape(Shape shape) {

		String returnValue = null;
		if (shape != null) {

			if (shape instanceof AutoShape) {
				try {

					returnValue = ((AutoShape) shape).getText();
				} catch (Exception ex) {

					ex.printStackTrace();
				}
			}
		}
		return returnValue;
	}

}
PowerPoint2003版本工具类:
 
/**
 * PowerPoint2003FileUtil.java
 * Copyright &reg; 2010 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
import org.apache.poi.sl.usermodel.SlideShow;

/**
 * <p>PowerPoint2003版文件工具类
 * 
 * <p>通用的PowerPoint2003版文件工具类,可用于从PowerPoint文档中抽取文本信息
 * 
 * @author  窦海宁, chong0660@sina.com
 * @since   AiyuCommonCore-1.0
 * @version AiyuCommonCore-1.0
 */
public abstract class PowerPoint2003FileUtil extends BasePowerPointFileUtil {

	/**
	 * <p>从PowerPoint文档中提取文本信息
	 * 
	 * @param  powerPointFile PowerPoint文件
	 * @param  shapeSeparator Shape分隔符
	 * @param  slideSeparator Slide分隔符
	 * 
	 * @return 提取后的文本信息
	 * 
	 * @modify 窦海宁, 2017-01-18
	 */
	protected static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) {

		StringBuffer returnValue = new StringBuffer();
		if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) {

			if (powerPointFile.isFile()) {

				try {

					SlideShow slideShow     = new HSLFSlideShow(new HSLFSlideShowImpl(powerPointFile.getCanonicalPath()));
					Iterator  slideIterator = PowerPoint2003FileUtil.readSlideShow(slideShow).iterator();
					//遍历Slide
					while (slideIterator.hasNext()) {

						Iterator shapeIterator = ((List) slideIterator.next()).iterator();
						//遍历Shape
						while (shapeIterator.hasNext()) {

							Object shapeValue = shapeIterator.next();
							if (shapeValue != null) {

								returnValue.append((String) shapeValue);
								if (shapeIterator.hasNext()) {

									returnValue.append(shapeSeparator);
								}
							}
						}
						if (slideIterator.hasNext()) {

							returnValue.append(slideSeparator);
						}
					}
				} catch (Exception ex) {

					ex.printStackTrace();
				}
			}
		}
		return StringUtils.trimToNull(returnValue.toString());
	}

}
 
PowerPoint2007版本工具类:
 
/**
 * PowerPoint2007FileUtil.java
 * Copyright &reg; 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xslf.usermodel.XMLSlideShow;

/**
 * <p>PowerPoint2007版文件工具类
 * 
 * <p>通用的PowerPoint2007版文件工具类,可用于从PowerPoint文档中抽取文本信息
 * 
 * @author  窦海宁, chong0660@sina.com
 * @since   AiyuCommonCore-1.0
 * @version AiyuCommonCore-1.0
 */
public abstract class PowerPoint2007FileUtil extends BasePowerPointFileUtil {

	/**
	 * <p>从PowerPoint文档中提取文本信息
	 * 
	 * @param  powerPointFile PowerPoint文件
	 * @param  shapeSeparator Shape分隔符
	 * @param  slideSeparator Slide分隔符
	 * 
	 * @return 提取后的文本信息
	 * 
	 * @modify 窦海宁, 2017-01-18
	 */
	protected static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) {

		StringBuffer returnValue = new StringBuffer();
		if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) {

			if (powerPointFile.isFile()) {

				try {

					XMLSlideShow slideShow     = new XMLSlideShow(new FileInputStream(powerPointFile));
					Iterator     slideIterator = PowerPoint2007FileUtil.readSlideShow(slideShow).iterator();
					//遍历Slide
					while (slideIterator.hasNext()) {

						Iterator shapeIterator = ((List) slideIterator.next()).iterator();
						//遍历Shape
						while (shapeIterator.hasNext()) {

							Object shapeValue = shapeIterator.next();
							if (shapeValue != null) {

								returnValue.append((String) shapeValue);
								if (shapeIterator.hasNext()) {

									returnValue.append(shapeSeparator);
								}
							}
						}
						if (slideIterator.hasNext()) {

							returnValue.append(slideSeparator);
						}
					}
				} catch (Exception ex) {

					ex.printStackTrace();
				}
			}
		}
		return StringUtils.trimToNull(returnValue.toString());
	}

}
 
统一调用工具类:
 
/**
 * PowerPointFileUtil.java
 * Copyright &reg; 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;

/**
 * <p>PowerPoint文件工具类
 * 
 * <p>通用的PowerPoint文件工具类,可用于从PowerPoint文档中抽取文本信息
 * 
 * @author  窦海宁, chong0660@sina.com
 * @since   AiyuCommonCore-1.0
 * @version AiyuCommonCore-1.0
 */
public abstract class PowerPointFileUtil extends BasePowerPointFileUtil {

	/**
	 * <p>从PowerPoint文档中提取文本信息
	 * 
	 * @param  powerPointFile PowerPoint文件
	 * @param  shapeSeparator Shape分隔符
	 * @param  slideSeparator Slide分隔符
	 * 
	 * @return 提取后的文本信息
	 * 
	 * @modify 窦海宁, 2017-02-06
	 */
	public static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) {

		String resultText = null;

		if (powerPointFile != null && powerPointFile.exists()) {

			String extension = FilenameUtils.getExtension(powerPointFile.getName());
			if (StringUtils.equalsIgnoreCase("ppt" , extension)) {

				//Office2003版文件处理
				resultText = PowerPoint2003FileUtil.extractTextFromPowerPointFile(powerPointFile , shapeSeparator , slideSeparator);
			} else if (StringUtils.equalsIgnoreCase("pptx" , extension)) {

				//Office2007版文件处理
				resultText = PowerPoint2003FileUtil.extractTextFromPowerPointFile(powerPointFile , shapeSeparator , slideSeparator);
			} else {

				//文件类型有误
			}
		}

		return resultText;
	}

}
 
统一调用工具类通过文件扩展名(PPT与PPTX,不区分大小写)判断文件版本,暂时没有想到更好的办法;本工具类使用POI_3.15实现,无须目标机器安装OFFICE软件也可进行文件读写。