EXCEL POI 合并单元格解析
程序员文章站
2022-07-13 12:42:27
...
EXCEL POI 合并单元格解析
1、EXCEL2007的合并单元格的XML格式
<mergeCells count="2">
<mergeCell ref="A1:A2"/>
<mergeCell ref="B3:B4"/>
</mergeCells>
mergeCells 里面包含了所有合并单元格的情况,需要读取到mergeCell就可以知道 该Sheet的所有单元格的合并情况
2 、SAX解析实现
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* 类名称:CellMergeParser
* 类描述:
*/
public class CellMergeParser {
private XSSFReader xssfReader;
List<String> merges = new ArrayList<>();
InputStream is;
/**
* 构造方法
*/
public CellMergeParser(String filename) throws Exception {
if (StringUtils.isEmpty(filename))
throw new Exception("文件名不能空");
is = new FileInputStream(new File(filename));
OPCPackage pkg = OPCPackage.open(is);
init(pkg);
}
public CellMergeParser(InputStream is) throws Exception {
if (null == is)
throw new Exception("流不能空");
OPCPackage pkg = OPCPackage.open(is);
init(pkg);
}
private void init(OPCPackage pkg) throws IOException, OpenXML4JException {
xssfReader = new XSSFReader(pkg);
}
/**
* 获取sheet
*
* @throws Exception
*/
public List<String> process(int sheetIndex) throws Exception {
SharedStringsTable sst = null;
try {
sst = xssfReader.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> it = xssfReader.getSheetsData();
int index = 0;
while (it.hasNext()) {
index++;
if (index == sheetIndex) {
InputStream sheet = it.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
return merges;
}
}
throw new IllegalArgumentException("sheet index not exist");
} finally {
try {
is.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 加载sax 解析器
*
* @param sst
* @return
* @throws SAXException
*/
private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new PagingHandler(sst);
parser.setContentHandler(handler);
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private class PagingHandler extends DefaultHandler {
private SharedStringsTable sst;
private PagingHandler(SharedStringsTable sst) {
this.sst = sst;
}
/**
* 开始元素
*/
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
//获取所有的 cell
if ("mergeCell".equalsIgnoreCase(name)) {
String value = attributes.getValue("ref");
merges.add(value);
}
}
/**
* 获取value
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
}
}
/**
* 转换成 merge bean
*
* @return
*/
public List<MergeBean> convertBean() {
List<MergeBean> mergeBeans = new ArrayList<>();
merges.forEach(v -> {
MergeBean mergeBean = RowColUtil.calcMerge(v);
mergeBeans.add(mergeBean);
});
return mergeBeans;
}
}
通过对EXCEL的格式理解,使用XML的SAX解析方案 解析出所有的CELLMerge元素,然后转化为下标范围
import org.apache.poi.ss.util.CellReference;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A1:A2
*
* @author dean <tanping>
* @version 1.0.0
* @date 2020/01/08 17:10
* @since 1.0.0
*/
public class RowColUtil {
/**
* 正则匹配 mergeBean
* @param group
* @return
*/
public static MergeBean calcMerge(String group){
group = group.toLowerCase();
String[] rw = group.split(":");
Pattern p = Pattern.compile("[a-z]+|\\d+");
Matcher m = p.matcher(rw[0]);
ArrayList<String> start = new ArrayList<>();
while (m.find()) {
start.add(m.group());
}
m = p.matcher(rw[1]);
ArrayList<String> end = new ArrayList<>();
while (m.find()) {
end.add(m.group());
}
int firstCol = CellReference.convertColStringToIndex(start.get(0));
int firstRow = Integer.parseInt(start.get(1));
int endCol = CellReference.convertColStringToIndex(end.get(0));
int endRow = Integer.parseInt(end.get(1));
MergeBean mergeBean = new MergeBean();
mergeBean.setFirstColumn(firstCol);
mergeBean.setFirstRow(firstRow);
mergeBean.setLastColumn(endCol);
mergeBean.setLastRow(endRow);
return mergeBean;
}
}
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
/**
* Merge group组
* [A1:A2, B3:B4]
* row 从1 开始
* @author dean <tanping>
* @version 1.0.0
* @date 2020/01/08 17:06
* @since 1.0.0
*/
@Getter
@Setter
@ToString
public class MergeBean {
int firstRow;
int firstColumn;
int lastRow;
int lastColumn;
/**
* 是否包含
* @param row
* @param column
* @return
*/
public boolean isMergeGroup(int row,int column){
if (row >= firstRow && row <= lastRow) {
if (column >= firstColumn && column <= lastColumn) {
return true;
}
}
return false;
}
/**
* 生成唯一值
* @return
*/
public String createdKey(){
return firstColumn+"-"+firstRow;
}
}
import lombok.Getter;
import lombok.Setter;
import org.apache.poi.ss.util.CellReference;
import javax.ws.rs.GET;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
/**
* 分组缓存
*
* @author dean <tanping>
* @version 1.0.0
* @date 2020/01/08 17:22
* @since 1.0.0
*/
@Getter
@Setter
public class MergeDataCacheManage {
/**
* 默认后缀
*/
public static final String POSTFIX = "_MG";
private List<MergeBean> mergeBeans;
private ConcurrentHashMap<String, String> groups = new ConcurrentHashMap<>();
private ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();
private int mergeMax = 0;
private String postfix;
/**
* 创建merge cache
*
* @param mergeBeans
*/
public MergeDataCacheManage(List<MergeBean> mergeBeans) {
this.mergeBeans = mergeBeans;
if (mergeBeans != null) {
mergeBeans.forEach(mb -> {
if (mb.getLastColumn() >= mergeMax) {
mergeMax = mb.getLastColumn();
}
});
}
postfix = POSTFIX;
}
/**
* row 从第1开始
* col 从0 开始
* @param data
* @param row
*/
public synchronized HashMap<String, String> caclGroup(HashMap<Integer, String> data, int row) {
if (mergeMax == 0) {
return null;
}
HashMap<String, String> postFixMap = null;
for (int i = 0; i <= mergeMax; i++) {
MergeBean mb = existGroup(row, i);
if (mb != null) {
if (postFixMap == null) {
postFixMap = new HashMap<>();
}
caclPostFix(postFixMap,data,mb,row,i);
}
}
return postFixMap;
}
/**
* 分组 + 赋值
* @param postFixMap
* @param data
* @param mb
* @param row
* @param col
*/
public synchronized void caclPostFix(HashMap<String, String> postFixMap, HashMap<Integer, String> data, MergeBean mb, int row, int col) {
String key = mb.createdKey();
if (groups.get(key) == null) {
groups.put(key, UUID.random());
if (data.get(col)!=null) {
values.put(key, data.get(col));
}
}
data.put(col,values.get(key));
String colName = CellReference.convertNumToColString(col);
postFixMap.put(colName+postfix,groups.get(key));
}
/**
* 是否存在组
*
* @param row
* @param col
* @return
*/
public MergeBean existGroup(int row, int col) {
for (MergeBean mb : mergeBeans) {
if (mb.isMergeGroup(row, col)) {
return mb;
}
}
return null;
}
}
对解析出来的结果进行计算。
测试
import java.util.List;
/**
* TODO
*
* @author dean <tanping>
* @version 1.0.0
* @date 2020/01/08 10:14
* @since 1.0.0
*/
public class SaxTest {
public static void main(String[] args) throws Exception {
String filepath = "C:\\Users\\tanping.GLOBALE\\Desktop\\demo.xlsx";
CellMergeParser reader = new CellMergeParser(filepath);
List<String> result = reader.process(1);
System.out.println(result);
result.forEach(v->{
MergeBean mergeBean = RowColUtil.calcMerge(v);
System.out.println(mergeBean);
});
}
}
上一篇: 机器学习算法在训练过程中保存参数
下一篇: tensorlfow正则化
推荐阅读
-
用Excel定位空值功能批量快速取消已合并的单元格提高数据分析效率
-
定位条件空值实现快速取消合并Excel单元格数据适合处理大数据
-
Excel借助快速访问工具栏自定义快捷键如合并单元格ALT+4
-
Excel对不规则大小不一致的区域进行合并单元格的批量操作
-
Excel借助公式配合不规则合并法实现合并单元格同时合并内容
-
Excel使用空值条件对合并单元格记录进行排序以2007版为例
-
JAVA使用POI(XSSFWORKBOOK)读取EXCEL文件过程解析
-
Excel 合并单元格后合并区域只有第一个单元格有数据
-
C#_Excel数据读取与写入_自定义解析封装类_支持设置标题行位置&使用excel表达式收集数据&单元格映射&标题映射&模板文件的参数数据替换(第二版-增加深度读取和更新功能)
-
取消合并单元格后Excel2007如何快速简单填充空值