使用POI读取大量数据EXCEL文件,并解析成自定义javaBean
程序员文章站
2022-07-13 12:35:07
...
1.几个javaBean的定义
因为javabean比较简单 这里就不详细写了,get set和构造方法自己定义
public class Excel {
private String fileName;
private String suffix;
private String type;
private List<SheetBean> sheets;
//get set
//Constructor
}
public class SheetBean {
private String sheetName;
private List<RowData> data;
private Headers headers;
//get set
//Constructor
}
public class Headers {
//有合并列的时候用到
private Map<String,List<String>> allColumns;
//所有的主要的列,如果有合并列,则代表最下面那一行的列,eg:第一行有合并列,那么columns代表第二行的所有列
private List<String> columns;
}
public class RowData {
private int rowIndex;
private int columns;
private Map<String,Object> data;
private Map<String,CellBean> cells;
}
public class CellBean {
private int colIndex;
private String colName;
private Object value;
private String type;
private int width;
private int colSpan;
private int rowSpan;
}
EXCEL解析
poi jar包版本是3.17的
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.springframework.beans.BeanUtils;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelHelper {
private static final Logger logger = LogManager.getLogger(ExcelParser.class);
/**
* 表格默认处理器
*/
private ISheetContentHandler contentHandler = new DefaultSheetHandler();
/**
* 读取数据
*/
private Excel excel=new Excel();
/**
* 转换表格,默认为转换第一个表格
* @param stream
* @return
* @throws InvalidFormatException
* @throws IOException
* @throws ParseException
*/
public ExcelHelper parse(InputStream stream)throws InvalidFormatException, IOException, ParseException {
return parse(stream, 1);
}
/**
*
* @param stream
* @param sheetId:为要遍历的sheet索引,从1开始
* @return
* @throws InvalidFormatException
* @throws IOException
* @throws ParseException
*/
public synchronized ExcelHelper parse(InputStream stream, int sheetId)throws InvalidFormatException, IOException, ParseException
{
// 每次转换前都清空数据
// datas.clear();
excel=new Excel();
// 打开表格文件输入流
OPCPackage pkg = OPCPackage.open(stream);
try {
// 创建表阅读器
XSSFReader reader;
try {
reader = new XSSFReader(pkg);
} catch (OpenXML4JException e) {
logger.error("读取表格出错");
throw new ParseException(e.fillInStackTrace());
}
// 转换指定单元表
String sheetName="rId"+sheetId;
// sheetName="全国点位资源表";
SheetBean sheetBean=new SheetBean(sheetName);
InputStream shellStream = reader.getSheet(sheetName);
try {
InputSource sheetSource = new InputSource(shellStream);
StylesTable styles = reader.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
getContentHandler().init(sheetBean);// 设置读取出的数据
// 获取转换器
XMLReader parser = getSheetParser(styles, strings);
parser.parse(sheetSource);
List<SheetBean> sheets=new ArrayList<SheetBean>();
sheets.add(sheetBean);
excel.setSheets(sheets);
} catch (SAXException e) {
logger.error("读取表格出错");
throw new ParseException(e.fillInStackTrace());
} finally {
shellStream.close();
}
} finally {
pkg.close();
}
return this;
}
/**
* 获取表格读取数据,获取数据前,需要先转换数据<br>
* 此方法不会获取第一行数据
*
* @return 表格读取数据
*/
public Excel getDatas() {
return excel;
}
/**
* 获取表格读取数据,获取数据前,需要先转换数据
*
* @param dropFirstRow
* 删除第一行表头记录
* @return 表格读取数据
*/
// public Excel getDatas(boolean dropFirstRow) {
// if (dropFirstRow && datas.size() > 0) {
// datas.remove(0);// 删除表头
// }
// return excel;
//
// }
/**
* 获取读取表格的转换器
*
* @return 读取表格的转换器
* @throws SAXException
* SAX错误
*/
protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader();
parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false));
return parser;
}
public ISheetContentHandler getContentHandler() {
return contentHandler;
}
public void setContentHandler(ISheetContentHandler contentHandler) {
this.contentHandler = contentHandler;
}
/**
* 表格转换错误
*/
public class ParseException extends Exception {
private static final long serialVersionUID = -2451526411018517607L;
public ParseException(Throwable t) {
super("表格转换错误", t);
}
}
public interface ISheetContentHandler extends SheetContentsHandler {
/**
* 设置转换后的数据集,用于存放转换结果
*
* @param datas
* 转换结果
*/
void init(SheetBean SheetBean);
}
/**
* 默认表格解析handder
*/
class DefaultSheetHandler implements ISheetContentHandler {
/**
* 读取数据
*/
private SheetBean sheetBean;
List<RowData> sheetData=new ArrayList<RowData>();
// 读取行信息
private RowData readRow=null;
private ArrayList<String> columns = new ArrayList<String>();
@Override
public void init(SheetBean SheetBean) {
this.sheetBean = SheetBean;
// this.columsLength = columsLength;
}
@Override
public void startRow(int rowNum) {
//System.out.print("开始解析第"+rowNum+"行。");
readRow=null;
if (rowNum != 0) {
readRow=new RowData(rowNum);
}
else {
if(sheetBean.getData()==null) {
sheetBean.setData(sheetData);
}
}
}
@Override
public void endRow(int rowNum) {
//System.out.println("第"+rowNum+"行解析结束。");
if (rowNum == 0) {
//表头 列
if(sheetBean.getHeaders()==null) {
Headers headers=new Headers(columns);
sheetBean.setHeaders(headers);
}
}else {
RowData row=new RowData();
BeanUtils.copyProperties(readRow, row);
sheetData.add(row);
}
readRow = null;
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
int colIndex = getCellIndex(cellReference);//转换A1,B1,C1等表格位置为真实索引位置
try {
if(readRow!=null) {
int rowIndex=readRow.getRowIndex();
String colName=columns.get(colIndex);
if(colName==null) {
System.out.println(rowIndex);
}
CellBean cellBean=new CellBean(colIndex,colName,formattedValue);
readRow.getCells().put(colName, cellBean);
readRow.getData().put(colName, formattedValue);
}
else {
columns.set(colIndex, formattedValue);
}
} catch (IndexOutOfBoundsException e) {
int size = columns.size();
for (int i = colIndex - size+1;i>0;i--){
columns.add(null);
}
columns.set(colIndex,formattedValue);
logger.info(e.getMessage()+" "+e.getCause());
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
/**
* 转换表格引用为列编号
*
* @param cellReference
* 列引用
* @return 表格列位置,从0开始算
*/
public int getCellIndex(String cellReference) {
String ref = cellReference.replaceAll("\\d+", "");
int num = 0;
int result = 0;
for (int i = 0; i < ref.length(); i++) {
char ch = cellReference.charAt(ref.length() - i - 1);
num = (int) (ch - 'A' + 1);
num *= Math.pow(26, i);
result += num;
}
return result - 1;
}
}
}
3.调用
public static void test2() {
try {
String date1=CommonUtil.getFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
String filePath = "F:\\20210624XXX.xlsx";
File file = new File(filePath);
// log.info("需要读取的文件fileName:{}", file.getName());
FileInputStream inputStream = new FileInputStream(file);
ExcelHelper excelParser = new ExcelHelper();
ExcelHelper parse = excelParser.parse(inputStream);
Excel excel=parse.getDatas();
//EXCEL文件的内容全在 excel对象里了,可以自己解析利用了
String date2=CommonUtil.getFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
System.out.println("开始时间:"+date1+"\t结束时间:"+date2);
} catch (Exception e) {
e.printStackTrace();
}
}
4.扩展
4.1实战中,可通过parse方法或者init方法传入Map<String,Object> fieldsMap ,将columns直接翻译成需要的字段集合。便于直接利用
4.2 可通过声明一个导入类 继承ExcelHelper 或者实现 ISheetContentHandler,直接进行过滤验证等操作,完成后将过滤好的集合返回用于保存,这个方法会在后面的文章提供实现。
要说的就这些了,希望这篇文章能对大家有用,如果感觉有用的话,请点个赞吧!