使用DocumentFormat.OpenXml操作Excel文件.xlsx
程序员文章站
2023-10-31 18:02:40
1.开始 DocumentFormat.OpenXml是ms官方给一个操作office三大件新版文件格式(.xlsx,.docx,.pptx)的组件;特色是它定义了OpenXml所包含的所有对象(たぶん),能做到精确微调文件内容格式;因此它没有EppPlus那么容易上手,性能也很看使用者的水平。。 ......
1.开始
documentformat.openxml是ms官方给一个操作office三大件新版文件格式(.xlsx,.docx,.pptx)的组件;特色是它定义了openxml所包含的所有对象(たぶん),能做到精确微调文件内容格式;因此它没有eppplus那么容易上手,性能也很看使用者的水平。。
documentformat.openxml的语法很接近直接操作xml,所以使用它来操作excel,得先熟悉excel的xml文档结构:
↑已经忘记从哪里找来的了; workbookpart包含4个重要子节点:
- worksheetpart:表格数据内容就在这里面,结构最复杂的部分,workheet的子节点除了colmns、sheetdata还有合并单元格集合mergecells(图中缺失);
- worksheet:存放表单id及命名(sheet1, sheet2...),这里有excel的坑,如果包含多个sheet直接sheets.first()有可能获取到最后一张sheet,最好根据name来搜索;
- workbootstylepart:存放样式;
- sharedstringtablepart(上图中缺失):共享字符串集合,字符串默认会存在里面,然后cell引用其数组下标,这也是类似保存1w行"一二三亖"的.xlsx比.txt小的原因
了解了这些想用documentformat.openxml从零开始new一个样式还看得过去的excel文档依然很麻烦(相信用eppplus、npoi也一样),而且openxml的同级节点还强调先后顺序,错了用excel打开就会报错,相信没人想这么做;正确的姿势应该是加载模板写入数据另存为,模板中把要用到的样式都定义好,然后用代码拷贝对应单元格的样式。
先定义一些扩展方法,不然用起来会很累:
1 using system; 2 using system.collections.generic; 3 using system.linq; 4 using system.xml; 5 using documentformat.openxml; 6 using documentformat.openxml.packaging; 7 using documentformat.openxml.spreadsheet; 8 9 namespace eopenxml 10 { 11 public static class openxmlexcelextentions 12 { 13 public static sheet getsheet(this workbookpart workbookpart, string sheetname) 14 { 15 return workbookpart.workbook 16 .getfirstchild<sheets>() 17 .elements<sheet>().where(s => s.name == sheetname).firstordefault(); 18 } 19 20 /// <summary> 21 /// given a worksheet and a row index, return the row. 22 /// </summary> 23 /// <param name="sheetdata"></param> 24 /// <param name="rowindex"></param> 25 /// <returns></returns> 26 public static row getrow(this sheetdata sheetdata, uint rowindex) 27 { 28 return sheetdata. 29 elements<row>().where(r => r.rowindex == rowindex).firstordefault(); 30 } 31 public static cell getcell(this sheetdata sheetdata, string columnname, uint rowindex) 32 { 33 row row = getrow(sheetdata, rowindex); 34 35 if (row == null) 36 return null; 37 38 return row.elements<cell>().where(c => string.compare 39 (c.cellreference.value, columnname + 40 rowindex, true) == 0).firstordefault(); 41 } 42 43 // https://msdn.microsoft.com/en-us/library/office/cc861607.aspx 44 // given a column name, a row index, and a worksheetpart, inserts a cell into the worksheet. 45 // if the cell already exists, returns it. 46 public static cell getorcreatecell(this sheetdata sheetdata, string columnname, uint rowindex) 47 { 48 string cellreference = columnname + rowindex; 49 50 // if the worksheet does not contain a row with the specified row index, insert one. 51 row row; 52 if (sheetdata.elements<row>().where(r => r.rowindex == rowindex).count() != 0) 53 { 54 row = sheetdata.elements<row>().where(r => r.rowindex == rowindex).first(); 55 } 56 else 57 { 58 row = new row() { rowindex = rowindex }; 59 sheetdata.append(row); 60 } 61 62 return row.getorcreatecell(cellreference); 63 } 64 public static cell getorcreatecell(this row row, string cellreference) 65 { 66 // if there is not a cell with the specified column name, insert one. 67 if (row.elements<cell>().where(c => c?.cellreference?.value == cellreference).count() > 0) 68 { 69 return row.elements<cell>().where(c => c.cellreference.value == cellreference).first(); 70 } 71 else 72 { 73 // cells must be in sequential order according to cellreference. determine where to insert the new cell. 74 cell refcell = null; 75 foreach (cell cell in row.elements<cell>()) 76 { 77 if (cell.cellreference.value.length == cellreference.length) 78 { 79 if (string.compare(cell.cellreference.value, cellreference, true) > 0) 80 { 81 refcell = cell; 82 break; 83 } 84 } 85 } 86 87 cell newcell = new cell() { cellreference = cellreference }; 88 row.insertbefore(newcell, refcell); 89 return newcell; 90 } 91 } 92 93 public static string getvalue(this cell cell, sharedstringtablepart sharestringpart) 94 { 95 if (cell == null) 96 return null; 97 string cellvalue = cell.innertext; 98 if (cell.datatype != null) 99 { 100 if (cell.datatype == cellvalues.sharedstring) 101 { 102 int id = -1; 103 if (int32.tryparse(cellvalue, out id)) 104 { 105 sharedstringitem item = getitem(sharestringpart, id); 106 if (item.text != null) 107 { 108 //code to take the string value 109 cellvalue = item.text.text; 110 } 111 else if (item.innertext != null) 112 { 113 cellvalue = item.innertext; 114 } 115 else if (item.innerxml != null) 116 { 117 cellvalue = item.innerxml; 118 } 119 } 120 } 121 } 122 return cellvalue; 123 } 124 public static string getvalue(this cell cell, string[] sharestringpartvalues) 125 { 126 if (cell == null) 127 return null; 128 string cellvalue = cell.innertext; 129 if (cell.datatype != null) 130 { 131 if (cell.datatype == cellvalues.sharedstring) 132 { 133 int id = -1; 134 if (int32.tryparse(cellvalue, out id)) 135 { 136 cellvalue = sharestringpartvalues[id]; 137 } 138 } 139 } 140 return cellvalue; 141 } 142 143 public static cell setvalue(this cell cell, object value = null, sharedstringtablepart sharestringpart = null, int sharestringitemindex = -1, uint styleindex = 0) 144 { 145 if (value == null) 146 { 147 cell.cellvalue = new cellvalue(); 148 if (sharestringitemindex != -1) 149 { 150 cell.cellvalue = new cellvalue(sharestringitemindex.tostring()); 151 cell.datatype = new enumvalue<cellvalues>(cellvalues.sharedstring); 152 } 153 } 154 else if (value is string str) 155 { 156 if (sharestringpart == null) 157 { 158 cell.cellvalue = new cellvalue(str); 159 cell.datatype = new enumvalue<cellvalues>(cellvalues.string); 160 } 161 else 162 { 163 // insert the text into the sharedstringtablepart. 164 int index = sharestringpart.getorinsertitem(str, false); 165 // set the value of cell 166 cell.cellvalue = new cellvalue(index.tostring()); 167 cell.datatype = new enumvalue<cellvalues>(cellvalues.sharedstring); 168 } 169 } 170 else if (value is int || value is short || value is long || 171 value is float || value is double || value is uint || 172 value is ulong || value is ushort || value is decimal) 173 { 174 cell.cellvalue = new cellvalue(value.tostring()); 175 cell.datatype = new enumvalue<cellvalues>(cellvalues.number); 176 } 177 else if (value is datetime date) 178 { 179 cell.cellvalue = new cellvalue(date.tostring("yyyy-mm-dd")); // iso 861 180 cell.datatype = new enumvalue<cellvalues>(cellvalues.date); 181 } 182 else if (value is xmldocument xd) 183 { 184 if (sharestringpart == null) 185 { 186 throw new exception("param [sharestringpart] can't be null when value type is xmldocument."); 187 } 188 else 189 { 190 int index = sharestringpart.getorinsertitem(xd.outerxml, true); 191 // set the value of cell 192 cell.cellvalue = new cellvalue(index.tostring()); 193 cell.datatype = new enumvalue<cellvalues>(cellvalues.sharedstring); 194 } 195 } 196 197 if (styleindex != 0) 198 cell.styleindex = styleindex; 199 200 return cell; 201 } 202 203 // https://msdn.microsoft.com/en-us/library/office/gg278314.aspx 204 // given text and a sharedstringtablepart, creates a sharedstringitem with the specified text 205 // and inserts it into the sharedstringtablepart. if the item already exists, returns its index. 206 public static int getorinsertitem(this sharedstringtablepart sharestringpart, string content, bool isxml) 207 { 208 // if the part does not contain a sharedstringtable, create one. 209 if (sharestringpart.sharedstringtable == null) 210 { 211 sharestringpart.sharedstringtable = new sharedstringtable(); 212 } 213 214 int i = 0; 215 216 // iterate through all the items in the sharedstringtable. if the text already exists, return its index. 217 foreach (sharedstringitem item in sharestringpart.sharedstringtable.elements<sharedstringitem>()) 218 { 219 if ((!isxml && item.innertext == content) || (isxml && item.outerxml == content)) 220 { 221 return i; 222 } 223 224 i++; 225 } 226 227 // the text does not exist in the part. create the sharedstringitem and return its index. 228 if (isxml) 229 sharestringpart.sharedstringtable.appendchild(new sharedstringitem(content)); 230 else 231 sharestringpart.sharedstringtable.appendchild(new sharedstringitem(new text(content))); 232 sharestringpart.sharedstringtable.save(); 233 234 return i; 235 } 236 private static sharedstringitem getitem(this sharedstringtablepart sharestringpart, int id) 237 { 238 return sharestringpart.sharedstringtable.elements<sharedstringitem>().elementat(id); 239 } 240 241 /// <summary> 242 /// https://docs.microsoft.com/en-us/office/open-xml/how-to-merge-two-adjacent-cells-in-a-spreadsheet 243 /// </summary> 244 /// <param name="worksheet"></param> 245 /// <returns></returns> 246 public static mergecells getorcreatemergecells(this worksheet worksheet) 247 { 248 mergecells mergecells; 249 if (worksheet.elements<mergecells>().count() > 0) 250 { 251 mergecells = worksheet.elements<mergecells>().first(); 252 } 253 else 254 { 255 mergecells = new mergecells(); 256 257 // insert a mergecells object into the specified position. 258 if (worksheet.elements<customsheetview>().count() > 0) 259 { 260 worksheet.insertafter(mergecells, worksheet.elements<customsheetview>().first()); 261 } 262 else if (worksheet.elements<dataconsolidate>().count() > 0) 263 { 264 worksheet.insertafter(mergecells, worksheet.elements<dataconsolidate>().first()); 265 } 266 else if (worksheet.elements<sortstate>().count() > 0) 267 { 268 worksheet.insertafter(mergecells, worksheet.elements<sortstate>().first()); 269 } 270 else if (worksheet.elements<autofilter>().count() > 0) 271 { 272 worksheet.insertafter(mergecells, worksheet.elements<autofilter>().first()); 273 } 274 else if (worksheet.elements<scenarios>().count() > 0) 275 { 276 worksheet.insertafter(mergecells, worksheet.elements<scenarios>().first()); 277 } 278 else if (worksheet.elements<protectedranges>().count() > 0) 279 { 280 worksheet.insertafter(mergecells, worksheet.elements<protectedranges>().first()); 281 } 282 else if (worksheet.elements<sheetprotection>().count() > 0) 283 { 284 worksheet.insertafter(mergecells, worksheet.elements<sheetprotection>().first()); 285 } 286 else if (worksheet.elements<sheetcalculationproperties>().count() > 0) 287 { 288 worksheet.insertafter(mergecells, worksheet.elements<sheetcalculationproperties>().first()); 289 } 290 else 291 { 292 worksheet.insertafter(mergecells, worksheet.elements<sheetdata>().first()); 293 } 294 worksheet.save(); 295 } 296 return mergecells; 297 } 298 299 /// <summary> 300 /// given the names of two adjacent cells, merges the two cells. 301 /// create the merged cell and append it to the mergecells collection. 302 /// when two cells are merged, only the content from one cell is preserved: 303 /// the upper-left cell for left-to-right languages or the upper-right cell for right-to-left languages. 304 /// </summary> 305 /// <param name="mergecells"></param> 306 /// <param name="cell1name"></param> 307 /// <param name="cell2name"></param> 308 public static void mergetwocells(this mergecells mergecells, string cell1name, string cell2name) 309 { 310 mergecell mergecell = new mergecell() { reference = new stringvalue(cell1name + ":" + cell2name) }; 311 mergecells.append(mergecell); 312 } 313 314 public static ienumerable<string> getitemvalues(this sharedstringtablepart sharestringpart) 315 { 316 foreach (var item in sharestringpart.sharedstringtable.elements<sharedstringitem>()) 317 { 318 if (item.text != null) 319 { 320 //code to take the string value 321 yield return item.text.text; 322 } 323 else if (item.innertext != null) 324 { 325 yield return item.innertext; 326 } 327 else if (item.innerxml != null) 328 { 329 yield return item.innerxml; 330 } 331 else 332 { 333 yield return null; 334 } 335 }; 336 } 337 public static xmldocument getcellassociatedsharedstringitemxmldocument(this sheetdata sheetdata, string columnname, uint rowindex, sharedstringtablepart sharestringpart) 338 { 339 cell cell = getcell(sheetdata, columnname, rowindex); 340 if (cell == null) 341 return null; 342 if (cell.datatype == cellvalues.sharedstring) 343 { 344 int id = -1; 345 if (int32.tryparse(cell.innertext, out id)) 346 { 347 sharedstringitem ssi = sharestringpart.getitem(id); 348 var doc = new xmldocument(); 349 doc.loadxml(ssi.outerxml); 350 return doc; 351 } 352 } 353 return null; 354 } 355 } 356 }
2.插入数据
1 private static void generateexcel() 2 { 3 using (memorystream mem = new memorystream()) 4 { 5 using (var temp = file.openread(@"e:\template.xlsx")) 6 { 7 temp.copyto(mem); 8 } 9 10 using (spreadsheetdocument doc = spreadsheetdocument.open(mem, true)) 11 { 12 workbookpart wbpart = doc.workbookpart; 13 worksheet worksheet = wbpart.worksheetparts.first().worksheet; 14 //statement to get the sheetdata which contains the rows and cell in table 15 sheetdata sheetdata = worksheet.getfirstchild<sheetdata>(); 16 17 sharedstringtablepart sharestringpart; 18 if (wbpart.getpartsoftype<sharedstringtablepart>().any()) 19 sharestringpart = wbpart.getpartsoftype<sharedstringtablepart>().first(); 20 else 21 sharestringpart = wbpart.addnewpart<sharedstringtablepart>(); 22 //假设模板第一行是title不用动,把要用到的样式都定义在了第二行的单元格里 23 var secondrow = sheetdata.getrow(2); 24 uint[] linestyles = secondrow.elements<cell>().select(c => c.styleindex.value).toarray(); 25 sheetdata.removechild(secondrow); 26 //从第二行开始循环插入4列1000数据 27 uint currentrowindex = 2; 28 for (int i = 0;i<1000;i++) 29 { 30 row row = new row(); 31 row.rowindex = currentrowindex;//设置行号 32 row.appendchild(new cell().setvalue(1, sharestringpart, styleindex: linestyles[0])); 33 row.appendchild(new cell().setvalue(datetime.now, sharestringpart, styleindex: linestyles[1])); 34 row.appendchild(new cell().setvalue(3.1415926535, sharestringpart, styleindex: linestyles[2])); 35 row.appendchild(new cell().setvalue("通商宽衣", sharestringpart, styleindex: linestyles[3]));//这里慢 36 sheetdata.appendchild(row); 37 currentrowindex++; 38 } 39 wbpart.workbook.save(); 40 doc.saveas($@"e:\temp_{datetime.now.tostring("yymmddhhmm")}.xlsx"); 41 doc.close(); 42 } 43 mem.close(); 44 } 45 }
以上就生成了一个excel打开不会报任何格式错误提示的标准.xlsx文件;但有需要优化的地方:在每次插入字符串的时候会去循环共享字符集,调用sharestringpart.getitemvalues().toarray()可以将集合全部存到数组或dictionary<string,int>里面会快很多,如果清楚集合内容就不用去判断重复了,当然也可以简单粗暴的保存为cellvalues.inlinestring,这样在重复字符串比较多的时候两种方式所生成的文件大小会有很大差异。
3.快速遍历
1 public static void read() 2 { 3 using (var sd = spreadsheetdocument.open(@"e:\temp.xlsx", false)) 4 { 5 workbookpart wbpart = sd.workbookpart; 6 sharedstringtablepart sharestringpart; 7 if (wbpart.getpartsoftype<sharedstringtablepart>().count() > 0) 8 sharestringpart = wbpart.getpartsoftype<sharedstringtablepart>().first(); 9 else 10 sharestringpart = wbpart.addnewpart<sharedstringtablepart>(); 11 string[] sharestringitemvalues = sharestringpart.getitemvalues().toarray(); 12 13 worksheetpart worksheetpart = wbpart.worksheetparts.first(); 14 uint datarowstart = 2;//数据开始行 15 openxmlreader reader = openxmlreader.create(worksheetpart); 16 while (reader.read()) 17 { 18 if (reader.elementtype == typeof(worksheet)) 19 { 20 reader.readfirstchild(); 21 } 22 23 if (reader.elementtype == typeof(row)) 24 { 25 row r = (row)reader.loadcurrentelement(); 26 if (r.rowindex < datarowstart) 27 continue; 28 foreach (cell c in r.elements<cell>()) 29 { 30 if (c.cellreference != null && c.cellreference.hasvalue) 31 { 32 string cv = c.getvalue(sharestringitemvalues); 33 console.writeline(cv); 34 if (c.cellreference.value == "b" + r.rowindex) 35 console.writeline("刚读取的是b列"); 36 } 37 } 38 } 39 } 40 sd.close(); 41 } 42 }
4.总结
documentformat.openxml不友好但操作透明,使用前最好先自行封装下,习惯之后相信能用的很爽。
以上です。
推荐阅读
-
JAVA使用POI(XSSFWORKBOOK)读取EXCEL文件过程解析
-
【C#常用方法】2.DataTable(或DataSet)与Excel文件之间的导出与导入(使用NPOI)
-
使用Numpy读取CSV文件,并进行行列删除的操作方法
-
python中使用 xlwt 操作excel的常见方法与问题
-
使用Python和xlwt向Excel文件中写入中文的实例
-
Python使用xlrd模块操作Excel数据导入的方法
-
Java使用POI操作Excel
-
Python使用文件操作实现一个XX信息管理系统
-
Linux下如何使用ftp命令对文件传输进行操作
-
使用DocumentFormat.OpenXml操作Excel文件.xlsx