POI实现超大数据的Excel的读写操作,支持Excel最大行数 博客分类: Java
程序员文章站
2024-03-24 14:38:34
...
参考:http://thinkgem.iteye.com/blog/2150940
前端时间写了注解方式Excel的读取和写入,它是根据注解完成Excel的操作,虽说支持大数据,但对于超大数据就无能为力了,因为它的读写期间都是将所有数据放入系统内存的,除非你有超大的内存。
因项目需要对超大数据的Excel读写操作,于是网上找了个超大数据的读写代码,这个不需要太大内存。并对此进行了简单的修改。
原理如下:
Excel超大数据读取:抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低内存的耗费,特别使用于大数据量的文件。
Excel超大数据写入:抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, 使用这种方法 写入.xlsx文件,不需要太大的内存。
先看调用示例:
- String file = "E:/导入测试数据.xlsx";
- ExcelReader reader = new ExcelReader() {
- public void getRows(int sheetIndex, int curRow, List<String> rowList) {
- System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
- }
- };
- reader.process(file, 1);
- String file = "E:/导出测试数据.xlsx";
- ExcelWriter writer = new ExcelWriter() {
- public void generate() throws Exception {
- // 电子表格开始
- this.beginSheet();
- for (int rownum = 0; rownum < 100; rownum++) {
- // 插入新行
- this.insertRow(rownum);
- // 建立新单元格,索引值从0开始,表示第一列
- this.createCell(0, "第 " + rownum + " 行");
- this.createCell(1, 34343.123456789);
- this.createCell(2, "23.67%");
- this.createCell(3, "12:12:23");
- this.createCell(4, "2014-10-11 12:12:23");
- this.createCell(5, "true");
- this.createCell(6, "false");
- // 结束行
- this.endRow();
- }
- // 电子表格结束
- this.endSheet();
- }
- };
- writer.process(file);
- }
这里只展示了对数据的读取和写入,如果正式保存到数据库时建议读取一部分(如100条)再写入一次数据库,尽量不要读取一条就写入一条,这样会非常耗费资源。
源代码如下:
- import java.io.InputStream;
- import java.math.BigDecimal;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.Date;
- import java.util.Iterator;
- import java.util.List;
- import org.apache.poi.hssf.usermodel.HSSFDateUtil;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.apache.poi.xssf.eventusermodel.XSSFReader;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.usermodel.XSSFRichTextString;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- import org.xml.sax.helpers.XMLReaderFactory;
- /**
- * Excel超大数据读取,抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析
- * xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低 内存的耗费,特别使用于大数据量的文件。
- * @version 2014-9-2
- */
- public abstract class ExcelReader extends DefaultHandler {
- // 共享字符串表
- private SharedStringsTable sst;
- // 上一次的内容
- private String lastContents;
- private boolean nextIsString;
- private int sheetIndex = -1;
- private List<String> rowList = new ArrayList<String>();
- // 当前行
- private int curRow = 0;
- // 当前列
- private int curCol = 0;
- // 日期标志
- private boolean dateFlag;
- // 数字标志
- private boolean numberFlag;
- private boolean isTElement;
- /**
- * 遍历工作簿中所有的电子表格
- * @param filename
- * @throws Exception
- */
- public void process(String filename) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- Iterator<InputStream> sheets = r.getSheetsData();
- while (sheets.hasNext()) {
- curRow = 0;
- sheetIndex++;
- InputStream sheet = sheets.next();
- InputSource sheetSource = new InputSource(sheet);
- parser.parse(sheetSource);
- sheet.close();
- }
- }
- /**
- * 只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3
- * @param filename
- * @param sheetId
- * @throws Exception
- */
- public void process(String filename, int sheetId) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- // 根据 rId# 或 rSheet# 查找sheet
- InputStream sheet2 = r.getSheet("rId" + sheetId);
- sheetIndex++;
- InputSource sheetSource = new InputSource(sheet2);
- parser.parse(sheetSource);
- sheet2.close();
- }
- public XMLReader fetchSheetParser(SharedStringsTable sst)
- throws SAXException {
- XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
- this.sst = sst;
- parser.setContentHandler(this);
- return parser;
- }
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- // System.out.println("startElement: " + localName + ", " + name + ", " + attributes);
- // c => 单元格
- if ("c".equals(name)) {
- // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
- String cellType = attributes.getValue("t");
- if ("s".equals(cellType)) {
- nextIsString = true;
- } else {
- nextIsString = false;
- }
- // 日期格式
- String cellDateType = attributes.getValue("s");
- if ("1".equals(cellDateType)) {
- dateFlag = true;
- } else {
- dateFlag = false;
- }
- String cellNumberType = attributes.getValue("s");
- if ("2".equals(cellNumberType)) {
- numberFlag = true;
- } else {
- numberFlag = false;
- }
- }
- // 当元素为t时
- if ("t".equals(name)) {
- isTElement = true;
- } else {
- isTElement = false;
- }
- // 置空
- lastContents = "";
- }
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- // System.out.println("endElement: " + localName + ", " + name);
- // 根据SST的索引值的到单元格的真正要存储的字符串
- // 这时characters()方法可能会被调用多次
- if (nextIsString) {
- try {
- int idx = Integer.parseInt(lastContents);
- lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
- .toString();
- } catch (Exception e) {
- }
- }
- // t元素也包含字符串
- if (isTElement) {
- String value = lastContents.trim();
- rowList.add(curCol, value);
- curCol++;
- isTElement = false;
- // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
- // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
- } else if ("v".equals(name)) {
- String value = lastContents.trim();
- value = value.equals("") ? " " : value;
- try {
- // 日期格式处理
- if (dateFlag) {
- Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
- SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy");
- value = dateFormat.format(date);
- }
- // 数字类型处理
- if (numberFlag) {
- BigDecimal bd = new BigDecimal(value);
- value = bd.setScale(3, BigDecimal.ROUND_UP).toString();
- }
- } catch (Exception e) {
- // 转换失败仍用读出来的值
- }
- rowList.add(curCol, value);
- curCol++;
- } else {
- // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
- if (name.equals("row")) {
- getRows(sheetIndex + 1, curRow, rowList);
- rowList.clear();
- curRow++;
- curCol = 0;
- }
- }
- }
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- // 得到单元格内容的值
- lastContents += new String(ch, start, length);
- }
- /**
- * 获取行数据回调
- * @param sheetIndex
- * @param curRow
- * @param rowList
- */
- public abstract void getRows(int sheetIndex, int curRow, List<String> rowList);
- /**
- * 测试方法
- */
- public static void main(String[] args) throws Exception {
- String file = "E:/导入测试数据.xlsx";
- ExcelReader reader = new ExcelReader() {
- public void getRows(int sheetIndex, int curRow, List<String> rowList) {
- System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
- }
- };
- reader.process(file, 1);
- }
- }
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.Writer;
- import java.util.Calendar;
- import java.util.Enumeration;
- import java.util.zip.ZipEntry;
- import java.util.zip.ZipFile;
- import java.util.zip.ZipOutputStream;
- import org.apache.poi.hssf.util.CellReference;
- import org.apache.poi.ss.usermodel.DateUtil;
- import org.apache.poi.xssf.usermodel.XSSFSheet;
- import org.apache.poi.xssf.usermodel.XSSFWorkbook;
- /**
- * Excel超大数据写入,抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml,
- * 使用这种方法 写入.xlsx文件,不需要太大的内存
- * @version 2014-9-2
- */
- public abstract class ExcelWriter {
- private SpreadsheetWriter sw;
- /**
- * 写入电子表格的主要流程
- *
- * @param fileName
- * @throws Exception
- */
- public void process(String fileName) throws Exception {
- // 建立工作簿和电子表格对象
- XSSFWorkbook wb = new XSSFWorkbook();
- XSSFSheet sheet = wb.createSheet("sheet1");
- // 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xml
- String sheetRef = sheet.getPackagePart().getPartName().getName();
- // 保存模板
- FileOutputStream os = new FileOutputStream("template.xlsx");
- wb.write(os);
- os.close();
- // 生成xml文件
- File tmp = File.createTempFile("sheet", ".xml");
- Writer fw = new FileWriter(tmp);
- sw = new SpreadsheetWriter(fw);
- generate();
- fw.close();
- // 使用产生的数据替换模板
- File templateFile = new File("template.xlsx");
- FileOutputStream out = new FileOutputStream(fileName);
- substitute(templateFile, tmp, sheetRef.substring(1), out);
- out.close();
- // 删除文件之前调用一下垃圾回收器,否则无法删除模板文件
- System.gc();
- // 删除临时模板文件
- if (templateFile.isFile() && templateFile.exists()) {
- templateFile.delete();
- }
- }
- /**
- * 类使用者应该使用此方法进行写操作
- *
- * @throws Exception
- */
- public abstract void generate() throws Exception;
- public void beginSheet() throws IOException {
- sw.beginSheet();
- }
- public void insertRow(int rowNum) throws IOException {
- sw.insertRow(rowNum);
- }
- public void createCell(int columnIndex, String value) throws IOException {
- sw.createCell(columnIndex, value, -1);
- }
- public void createCell(int columnIndex, double value) throws IOException {
- sw.createCell(columnIndex, value, -1);
- }
- public void endRow() throws IOException {
- sw.endRow();
- }
- public void endSheet() throws IOException {
- sw.endSheet();
- }
- /**
- *
- * @param zipfile
- * the template file
- * @param tmpfile
- * the XML file with the sheet data
- * @param entry
- * the name of the sheet entry to substitute, e.g.
- * xl/worksheets/sheet1.xml
- * @param out
- * the stream to write the result to
- */
- private static void substitute(File zipfile, File tmpfile, String entry,
- OutputStream out) throws IOException {
- ZipFile zip = new ZipFile(zipfile);
- ZipOutputStream zos = new ZipOutputStream(out);
- @SuppressWarnings("unchecked")
- Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries();
- while (en.hasMoreElements()) {
- ZipEntry ze = en.nextElement();
- if (!ze.getName().equals(entry)) {
- zos.putNextEntry(new ZipEntry(ze.getName()));
- InputStream is = zip.getInputStream(ze);
- copyStream(is, zos);
- is.close();
- }
- }
- zos.putNextEntry(new ZipEntry(entry));
- InputStream is = new FileInputStream(tmpfile);
- copyStream(is, zos);
- is.close();
- zos.close();
- }
- private static void copyStream(InputStream in, OutputStream out)
- throws IOException {
- byte[] chunk = new byte[1024];
- int count;
- while ((count = in.read(chunk)) >= 0) {
- out.write(chunk, 0, count);
- }
- }
- /**
- * 在写入器中写入电子表格
- *
- */
- public static class SpreadsheetWriter {
- private final Writer _out;
- private int _rownum;
- private static String LINE_SEPARATOR = System
- .getProperty("line.separator");
- public SpreadsheetWriter(Writer out) {
- _out = out;
- }
- public void beginSheet() throws IOException {
- _out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
- + "<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">");
- _out.write("<sheetData>" + LINE_SEPARATOR);
- }
- public void endSheet() throws IOException {
- _out.write("</sheetData>");
- _out.write("</worksheet>");
- }
- /**
- * 插入新行
- *
- * @param rownum
- * 以0开始
- */
- public void insertRow(int rownum) throws IOException {
- _out.write("<row r=\"" + (rownum + 1) + "\">" + LINE_SEPARATOR);
- this._rownum = rownum;
- }
- /**
- * 插入行结束标志
- */
- public void endRow() throws IOException {
- _out.write("</row>" + LINE_SEPARATOR);
- }
- /**
- * 插入新列
- *
- * @param columnIndex
- * @param value
- * @param styleIndex
- * @throws IOException
- */
- public void createCell(int columnIndex, String value, int styleIndex)
- throws IOException {
- String ref = new CellReference(_rownum, columnIndex)
- .formatAsString();
- _out.write("<c r=\"" + ref + "\" t=\"inlineStr\"");
- if (styleIndex != -1)
- _out.write(" s=\"" + styleIndex + "\"");
- _out.write(">");
- _out.write("<is><t>" + encoderXML(value) + "</t></is>");
- _out.write("</c>");
- }
- public void createCell(int columnIndex, String value)
- throws IOException {
- createCell(columnIndex, value, -1);
- }
- public void createCell(int columnIndex, double value, int styleIndex)
- throws IOException {
- String ref = new CellReference(_rownum, columnIndex)
- .formatAsString();
- _out.write("<c r=\"" + ref + "\" t=\"n\"");
- if (styleIndex != -1)
- _out.write(" s=\"" + styleIndex + "\"");
- _out.write(">");
- _out.write("<v>" + value + "</v>");
- _out.write("</c>");
- }
- public void createCell(int columnIndex, double value)
- throws IOException {
- createCell(columnIndex, value, -1);
- }
- public void createCell(int columnIndex, Calendar value, int styleIndex)
- throws IOException {
- createCell(columnIndex, DateUtil.getExcelDate(value, false),
- styleIndex);
- }
- }
- // XML Encode
- private static final String[] xmlCode = new String[256];
- static {
- // Special characters
- xmlCode['\''] = "'";
- xmlCode['\"'] = "\""; // double quote
- xmlCode['&'] = "&"; // ampersand
- xmlCode['<'] = "<"; // lower than
- xmlCode['>'] = ">"; // greater than
- }
- /**
- * <p>
- * Encode the given text into xml.
- * </p>
- *
- * @param string
- * the text to encode
- * @return the encoded string
- */
- public static String encoderXML(String string) {
- if (string == null)
- return "";
- int n = string.length();
- char character;
- String xmlchar;
- StringBuffer buffer = new StringBuffer();
- // loop over all the characters of the String.
- for (int i = 0; i < n; i++) {
- character = string.charAt(i);
- // the xmlcode of these characters are added to a StringBuffer
- // one by one
- try {
- xmlchar = xmlCode[character];
- if (xmlchar == null) {
- buffer.append(character);
- } else {
- buffer.append(xmlCode[character]);
- }
- } catch (ArrayIndexOutOfBoundsException aioobe) {
- buffer.append(character);
- }
- }
- return buffer.toString();
- }
- /**
- * 测试方法
- */
- public static void main(String[] args) throws Exception {
- String file = "E:/导出测试数据.xlsx";
- ExcelWriter writer = new ExcelWriter() {
- public void generate() throws Exception {
- // 电子表格开始
- this.beginSheet();
- for (int rownum = 0; rownum < 100; rownum++) {
- // 插入新行
- this.insertRow(rownum);
- // 建立新单元格,索引值从0开始,表示第一列
- this.createCell(0, "第 " + rownum + " 行");
- this.createCell(1, 34343.123456789);
- this.createCell(2, "23.67%");
- this.createCell(3, "12:12:23");
- this.createCell(4, "2014-10-11 12:12:23");
- this.createCell(5, "true");
- this.createCell(6, "false");
- // 结束行
- this.endRow();
- }
- // 电子表格结束
- this.endSheet();
- }
- };
- writer.process(file);
- }
- }
上一篇: 74. 搜索二维矩阵
下一篇: 深入理解G1垃圾收集器