欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用POI读取大量数据EXCEL文件,并解析成自定义javaBean

程序员文章站 2022-07-13 12:35:07
...

1.几个javaBean的定义

因为javabean比较简单 这里就不详细写了,get set和构造方法自己定义

public class Excel {

	private String fileName;
	
	private String suffix;
	
	private String type;
	
	private List<SheetBean> sheets;
	//get set
	//Constructor
}

public class SheetBean {

	private String sheetName;
	
	private List<RowData> data;
	
	private Headers headers;
	//get set
	//Constructor
}

public class Headers {

	//有合并列的时候用到
	private Map<String,List<String>> allColumns;
	
	//所有的主要的列,如果有合并列,则代表最下面那一行的列,eg:第一行有合并列,那么columns代表第二行的所有列
	private List<String> columns;
}

public class RowData {

	private int rowIndex;
	
	private int columns;
	
	private Map<String,Object> data;
	
	private Map<String,CellBean> cells;
}

public class CellBean {

	private int colIndex;
	
	private String colName;
	
	private Object value;
	
	private String type;
	
	private int width;
	
	private int colSpan;
	
	private int rowSpan;
}

EXCEL解析

poi jar包版本是3.17的

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.springframework.beans.BeanUtils;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelHelper {



	private static final Logger logger = LogManager.getLogger(ExcelParser.class);
	/**
	 * 表格默认处理器
	 */
	private ISheetContentHandler contentHandler = new DefaultSheetHandler();
	/**
	 * 读取数据
	 */
	private Excel excel=new Excel();

	/**
	 * 转换表格,默认为转换第一个表格
	 * @param stream
	 * @return
	 * @throws InvalidFormatException
	 * @throws IOException
	 * @throws ParseException
	 */
	public ExcelHelper parse(InputStream stream)throws InvalidFormatException, IOException, ParseException {
		return parse(stream, 1);
	}


	/**
	 *
	 * @param stream
	 * @param sheetId:为要遍历的sheet索引,从1开始
	 * @return
	 * @throws InvalidFormatException
	 * @throws IOException
	 * @throws ParseException
	 */
	public synchronized ExcelHelper parse(InputStream stream, int sheetId)throws InvalidFormatException, IOException, ParseException 
	{
		// 每次转换前都清空数据
//		datas.clear();
		excel=new Excel();
		// 打开表格文件输入流
		OPCPackage pkg = OPCPackage.open(stream);
		try {
			// 创建表阅读器
			XSSFReader reader;
			try {
				reader = new XSSFReader(pkg);
			} catch (OpenXML4JException e) {
				logger.error("读取表格出错");
				throw new ParseException(e.fillInStackTrace());
			}

			// 转换指定单元表
			String sheetName="rId"+sheetId;
//			sheetName="全国点位资源表";
			SheetBean sheetBean=new SheetBean(sheetName);
			InputStream shellStream = reader.getSheet(sheetName);
			try {
				InputSource sheetSource = new InputSource(shellStream);
				StylesTable styles = reader.getStylesTable();
				ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
				getContentHandler().init(sheetBean);// 设置读取出的数据
				// 获取转换器
				XMLReader parser = getSheetParser(styles, strings);
				parser.parse(sheetSource);
				List<SheetBean> sheets=new ArrayList<SheetBean>();
				sheets.add(sheetBean);
				excel.setSheets(sheets);
			} catch (SAXException e) {
				logger.error("读取表格出错");
				throw new ParseException(e.fillInStackTrace());
			} finally {
				shellStream.close();
			}
		} finally {
			pkg.close();

		}
		return this;

	}

	/**
	 * 获取表格读取数据,获取数据前,需要先转换数据<br>
	 * 此方法不会获取第一行数据
	 *
	 * @return 表格读取数据
	 */
	public Excel getDatas() {
		return excel;

	}

	/**
	 * 获取表格读取数据,获取数据前,需要先转换数据
	 *
	 * @param dropFirstRow
	 *    删除第一行表头记录
	 * @return 表格读取数据
	 */
//	public Excel getDatas(boolean dropFirstRow) {
//		if (dropFirstRow && datas.size() > 0) {
//			datas.remove(0);// 删除表头
//		}
//		return excel;
//
//	}

	/**
	 * 获取读取表格的转换器
	 *
	 * @return 读取表格的转换器
	 * @throws SAXException
	 * SAX错误
	 */
	protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException {
		XMLReader parser = XMLReaderFactory.createXMLReader();
		parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false));
		return parser;
	}

	public ISheetContentHandler getContentHandler() {
		return contentHandler;
	}

	public void setContentHandler(ISheetContentHandler contentHandler) {
		this.contentHandler = contentHandler;
	}

	/**
	 * 表格转换错误
	 */
	public class ParseException extends Exception {
		private static final long serialVersionUID = -2451526411018517607L;

		public ParseException(Throwable t) {
			super("表格转换错误", t);
		}

	}

	public interface ISheetContentHandler extends SheetContentsHandler {
		/**
  		* 设置转换后的数据集,用于存放转换结果
  		*
  		* @param datas
  		*    转换结果
  		*/
		void init(SheetBean SheetBean);
	}

	/**
	 * 默认表格解析handder
	 */
	class DefaultSheetHandler implements ISheetContentHandler {
		/**
  		* 读取数据
  		*/
		private SheetBean sheetBean;
		List<RowData> sheetData=new ArrayList<RowData>();
		// 读取行信息
		private RowData readRow=null;
		
		private ArrayList<String> columns = new ArrayList<String>();
		
		@Override
		public void init(SheetBean SheetBean) {
			this.sheetBean = SheetBean;
			//  this.columsLength = columsLength;
		}

		@Override
		public void startRow(int rowNum) {
			//System.out.print("开始解析第"+rowNum+"行。");
			readRow=null;
			if (rowNum != 0) {
				readRow=new RowData(rowNum);
			}
			else {
				if(sheetBean.getData()==null) {
					sheetBean.setData(sheetData);
				}
			}
		}

		@Override
		public void endRow(int rowNum) {
			//System.out.println("第"+rowNum+"行解析结束。");
			if (rowNum == 0) {
				//表头 列
				if(sheetBean.getHeaders()==null) {
					Headers headers=new Headers(columns);
					sheetBean.setHeaders(headers);
				}
			}else {
				RowData row=new RowData();
				BeanUtils.copyProperties(readRow, row);
				sheetData.add(row);
			}
			readRow = null;
		}

		@Override
		public void cell(String cellReference, String formattedValue, XSSFComment comment) {
			int colIndex = getCellIndex(cellReference);//转换A1,B1,C1等表格位置为真实索引位置
			try {
				if(readRow!=null) {
					int rowIndex=readRow.getRowIndex();
					String colName=columns.get(colIndex);
					if(colName==null) {
						System.out.println(rowIndex);
					}
					CellBean cellBean=new CellBean(colIndex,colName,formattedValue);
					readRow.getCells().put(colName, cellBean);
					readRow.getData().put(colName, formattedValue);
				}
				else {
					columns.set(colIndex, formattedValue);
				}
			} catch (IndexOutOfBoundsException e) {
				int size = columns.size();
				for (int i = colIndex - size+1;i>0;i--){
					columns.add(null);
				}
				columns.set(colIndex,formattedValue);
				logger.info(e.getMessage()+" "+e.getCause());
			}
		}

		@Override
		public void headerFooter(String text, boolean isHeader, String tagName) {
			

		}

		/**
  		* 转换表格引用为列编号
  		*
  		* @param cellReference
  		*    列引用
  		* @return 表格列位置,从0开始算
  		*/
		public int getCellIndex(String cellReference) {
			String ref = cellReference.replaceAll("\\d+", "");
			int num = 0;
			int result = 0;
			for (int i = 0; i < ref.length(); i++) {
				char ch = cellReference.charAt(ref.length() - i - 1);
				num = (int) (ch - 'A' + 1);
				num *= Math.pow(26, i);
				result += num;
			}
			return result - 1;
		}
	}
}

3.调用

public static void test2() {
		try {
			String date1=CommonUtil.getFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
			String filePath = "F:\\20210624XXX.xlsx";
			File file = new File(filePath);
			//	log.info("需要读取的文件fileName:{}", file.getName());
			FileInputStream inputStream = new FileInputStream(file);
			ExcelHelper excelParser = new ExcelHelper();
			ExcelHelper parse = excelParser.parse(inputStream);
			Excel excel=parse.getDatas();
			//EXCEL文件的内容全在 excel对象里了,可以自己解析利用了
			String date2=CommonUtil.getFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
			System.out.println("开始时间:"+date1+"\t结束时间:"+date2);
		} catch (Exception e) {
			e.printStackTrace();
		}
}

4.扩展

4.1实战中,可通过parse方法或者init方法传入Map<String,Object> fieldsMap ,将columns直接翻译成需要的字段集合。便于直接利用
4.2 可通过声明一个导入类 继承ExcelHelper 或者实现 ISheetContentHandler,直接进行过滤验证等操作,完成后将过滤好的集合返回用于保存,这个方法会在后面的文章提供实现。

要说的就这些了,希望这篇文章能对大家有用,如果感觉有用的话,请点个赞吧!

相关标签: java excel 后端