欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

EXCEL POI 合并单元格解析

程序员文章站 2022-07-13 12:42:27
...

EXCEL POI 合并单元格解析

1、EXCEL2007的合并单元格的XML格式

<mergeCells count="2">
		<mergeCell ref="A1:A2"/>
		<mergeCell ref="B3:B4"/>
</mergeCells>

mergeCells 里面包含了所有合并单元格的情况,需要读取到mergeCell就可以知道 该Sheet的所有单元格的合并情况

2 、SAX解析实现

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * 类名称:CellMergeParser
 * 类描述:
 */
public class CellMergeParser {
    private XSSFReader xssfReader;

    List<String> merges = new ArrayList<>();
    InputStream is;

    /**
     * 构造方法
     */
    public CellMergeParser(String filename) throws Exception {
        if (StringUtils.isEmpty(filename))
            throw new Exception("文件名不能空");
        is = new FileInputStream(new File(filename));
        OPCPackage pkg = OPCPackage.open(is);
        init(pkg);

    }

    public CellMergeParser(InputStream is) throws Exception {
        if (null == is)
            throw new Exception("流不能空");
        OPCPackage pkg = OPCPackage.open(is);
        init(pkg);
    }

    private void init(OPCPackage pkg) throws IOException, OpenXML4JException {
        xssfReader = new XSSFReader(pkg);
    }

    /**
     * 获取sheet
     *
     * @throws Exception
     */
    public List<String> process(int sheetIndex) throws Exception {
        SharedStringsTable sst = null;
        try {
            sst = xssfReader.getSharedStringsTable();
            XMLReader parser = fetchSheetParser(sst);
            Iterator<InputStream> it = xssfReader.getSheetsData();
            int index = 0;
            while (it.hasNext()) {
                index++;
                if (index == sheetIndex) {
                    InputStream sheet = it.next();
                    InputSource sheetSource = new InputSource(sheet);
                    parser.parse(sheetSource);
                    sheet.close();


                    return merges;
                }
            }
            throw new IllegalArgumentException("sheet index not exist");
        } finally {
            try {
                is.close();
            } catch (Exception e) {
                e.printStackTrace();
            }

        }

    }

    /**
     * 加载sax 解析器
     *
     * @param sst
     * @return
     * @throws SAXException
     */
    private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new PagingHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private class PagingHandler extends DefaultHandler {
        private SharedStringsTable sst;

        private PagingHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        /**
         * 开始元素
         */
        @Override
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
            //获取所有的 cell
            if ("mergeCell".equalsIgnoreCase(name)) {
                String value = attributes.getValue("ref");
                merges.add(value);
            }
        }

        /**
         * 获取value
         */
        @Override
        public void endElement(String uri, String localName, String name) throws SAXException {
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
        }

    }

    /**
     * 转换成 merge bean
     *
     * @return
     */
    public List<MergeBean> convertBean() {
        List<MergeBean> mergeBeans = new ArrayList<>();

        merges.forEach(v -> {
            MergeBean mergeBean = RowColUtil.calcMerge(v);
            mergeBeans.add(mergeBean);

        });

        return mergeBeans;
    }

}

通过对EXCEL的格式理解,使用XML的SAX解析方案 解析出所有的CELLMerge元素,然后转化为下标范围

import org.apache.poi.ss.util.CellReference;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A1:A2
 *
 * @author dean <tanping>
 * @version 1.0.0
 * @date 2020/01/08 17:10
 * @since 1.0.0
 */
public class RowColUtil {
    /**
     * 正则匹配 mergeBean
     * @param group
     * @return
     */
    public  static  MergeBean calcMerge(String group){
        group = group.toLowerCase();
        String[] rw = group.split(":");

        Pattern p = Pattern.compile("[a-z]+|\\d+");
        Matcher m = p.matcher(rw[0]);
        ArrayList<String> start = new ArrayList<>();
        while (m.find()) {
            start.add(m.group());
        }

        m = p.matcher(rw[1]);
        ArrayList<String> end = new ArrayList<>();
        while (m.find()) {
            end.add(m.group());
        }

        int firstCol = CellReference.convertColStringToIndex(start.get(0));
        int firstRow = Integer.parseInt(start.get(1));

        int endCol = CellReference.convertColStringToIndex(end.get(0));
        int endRow = Integer.parseInt(end.get(1));

        MergeBean mergeBean = new MergeBean();
        mergeBean.setFirstColumn(firstCol);
        mergeBean.setFirstRow(firstRow);
        mergeBean.setLastColumn(endCol);
        mergeBean.setLastRow(endRow);

        return mergeBean;
    }
}

import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

/**
 * Merge group组
 * [A1:A2, B3:B4]
 *  row 从1 开始
 * @author dean <tanping>
 * @version 1.0.0
 * @date 2020/01/08 17:06
 * @since 1.0.0
 */
@Getter
@Setter
@ToString
public class MergeBean {
    int firstRow;
    int firstColumn;
    int lastRow;
    int lastColumn;

    /**
     * 是否包含
     * @param row
     * @param column
     * @return
     */
    public boolean isMergeGroup(int row,int column){
        if (row >= firstRow && row <= lastRow) {
            if (column >= firstColumn && column <= lastColumn) {
                return true;
            }
        }
        return false;
    }

    /**
     * 生成唯一值
     * @return
     */
    public String createdKey(){
        return firstColumn+"-"+firstRow;
    }
}

import lombok.Getter;
import lombok.Setter;
import org.apache.poi.ss.util.CellReference;

import javax.ws.rs.GET;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;

/**
 * 分组缓存
 *
 * @author dean <tanping>
 * @version 1.0.0
 * @date 2020/01/08 17:22
 * @since 1.0.0
 */
@Getter
@Setter
public class MergeDataCacheManage {

    /**
     * 默认后缀
     */
    public static final String POSTFIX = "_MG";

    private List<MergeBean> mergeBeans;
    private ConcurrentHashMap<String, String> groups = new ConcurrentHashMap<>();
    private ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();

    private int mergeMax = 0;
    private String postfix;

    /**
     * 创建merge cache
     *
     * @param mergeBeans
     */
    public MergeDataCacheManage(List<MergeBean> mergeBeans) {

        this.mergeBeans = mergeBeans;
        if (mergeBeans != null) {
            mergeBeans.forEach(mb -> {
                if (mb.getLastColumn() >= mergeMax) {
                    mergeMax = mb.getLastColumn();
                }
            });
        }
        postfix = POSTFIX;
    }


    /**
     * row 从第1开始
     * col 从0 开始
     * @param data
     * @param row
     */
    public synchronized  HashMap<String, String> caclGroup(HashMap<Integer, String> data, int row) {
        if (mergeMax == 0) {
            return null;
        }
        HashMap<String, String> postFixMap = null;
        for (int i = 0; i <= mergeMax; i++) {
            MergeBean mb = existGroup(row, i);
            if (mb != null) {
                if (postFixMap == null) {
                    postFixMap = new HashMap<>();
                }
                caclPostFix(postFixMap,data,mb,row,i);
            }
        }
        return postFixMap;
    }


    /**
     * 分组 + 赋值
     * @param postFixMap
     * @param data
     * @param mb
     * @param row
     * @param col
     */
    public synchronized void caclPostFix(HashMap<String, String> postFixMap, HashMap<Integer, String> data, MergeBean mb, int row, int col) {
        String key = mb.createdKey();
        if (groups.get(key) == null) {
            groups.put(key, UUID.random());
            if (data.get(col)!=null) {
                values.put(key, data.get(col));
            }
        }
        data.put(col,values.get(key));

        String colName = CellReference.convertNumToColString(col);
        postFixMap.put(colName+postfix,groups.get(key));
    }


    /**
     * 是否存在组
     *
     * @param row
     * @param col
     * @return
     */
    public MergeBean existGroup(int row, int col) {

        for (MergeBean mb : mergeBeans) {
            if (mb.isMergeGroup(row, col)) {
                return mb;
            }
        }
        return null;
    }

}

对解析出来的结果进行计算。

测试

import java.util.List;

/**
 * TODO
 *
 * @author dean <tanping>
 * @version 1.0.0
 * @date 2020/01/08 10:14
 * @since 1.0.0
 */
public class SaxTest {

    public static void main(String[] args) throws Exception {
        String filepath = "C:\\Users\\tanping.GLOBALE\\Desktop\\demo.xlsx";
        CellMergeParser reader = new CellMergeParser(filepath);
        List<String> result = reader.process(1);

        System.out.println(result);

        result.forEach(v->{
            MergeBean mergeBean = RowColUtil.calcMerge(v);
            System.out.println(mergeBean);
        });
    }
}

相关标签: xmll java