使用NOPI读取Word、Excel文档内容
程序员文章站
2023-12-19 11:31:16
使用nopi读取excel的例子很多,读取word的例子不多。
excel的解析方式有多中,可以使用odbc查询,把excel作为一个数据集对待。也可以使用文档结构模型的...
使用nopi读取excel的例子很多,读取word的例子不多。
excel的解析方式有多中,可以使用odbc查询,把excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析workbook(工作簿)、sheet、row、column。
word的解析比较复杂,因为word的文档结构模型定义较为复杂。解析word或者excel,关键是理解word、excel的文档对象模型。
word、excel文档对象模型的解析,可以通过com接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用xml模型解析,尤其是对于2007、2010版本的文档的解析。
using npoi.poifs.filesystem; using npoi.ss.usermodel; using npoi.xssf.usermodel; using npoi.xwpf.usermodel; using system; using system.collections.generic; using system.configuration; using system.io; using system.text; namespace eyuan { public static class nopihandler { /// <summary> /// /// </summary> /// <param name="filename"></param> /// <returns></returns> public static list<list<list<string>>> readexcel(string filename) { //打开excel工作簿 xssfworkbook hssfworkbook = null; try { using (filestream file = new filestream(filename, filemode.open, fileaccess.read)) { hssfworkbook = new xssfworkbook(file); } } catch (exception e) { loghandler.logwrite(string.format("文件{0}打开失败,错误:{1}", new string[] { filename, e.tostring() })); } //循环sheet页 int sheetscount = hssfworkbook.numberofsheets; list<list<list<string>>> workbookcontent = new list<list<list<string>>>(); for (int i = 0; i < sheetscount; i++) { //sheet索引从0开始 isheet sheet = hssfworkbook.getsheetat(i); //循环行 list<list<string>> sheetcontent = new list<list<string>>(); int rowcount = sheet.physicalnumberofrows; for (int j = 0; j < rowcount; j++) { //row(逻辑行)的索引从0开始 irow row = sheet.getrow(j); //循环列(各行的列数可能不同) list<string> rowcontent = new list<string>(); int cellcount = row.physicalnumberofcells; for (int k = 0; k < cellcount; k++) { //icell cell = row.getcell(k); icell cell = row.cells[k]; if (cell == null) { rowcontent.add("nil"); } else { rowcontent.add(cell.tostring()); //rowcontent.add(cell.stringcellvalue); } } //添加行到集合中 sheetcontent.add(rowcontent); } //添加sheet到集合中 workbookcontent.add(sheetcontent); } return workbookcontent; } /// <summary> /// /// </summary> /// <param name="filename"></param> /// <returns></returns> public static string readexceltext(string filename) { string excelcellseparator = configurationmanager.appsettings["excelcellseparator"]; string excelrowseparator = configurationmanager.appsettings["excelrowseparator"]; string excelsheetseparator = configurationmanager.appsettings["excelsheetseparator"]; // list<list<list<string>>> excelcontent = readexcel(filename); string filetext = string.empty; stringbuilder sbfiletext = new stringbuilder(); //循环处理workbook中的各sheet页 list<list<list<string>>>.enumerator enumeratorworkbook = excelcontent.getenumerator(); while (enumeratorworkbook.movenext()) { //循环处理当期sheet页中的各行 list<list<string>>.enumerator enumeratorsheet = enumeratorworkbook.current.getenumerator(); while (enumeratorsheet.movenext()) { string[] rowcontent = enumeratorsheet.current.toarray(); sbfiletext.append(string.join(excelcellseparator, rowcontent)); sbfiletext.append(excelrowseparator); } sbfiletext.append(excelsheetseparator); } // filetext = sbfiletext.tostring(); return filetext; } /// <summary> /// 读取word内容 /// </summary> /// <param name="filename"></param> /// <returns></returns> public static string readwordtext(string filename) { string wordtablecellseparator = configurationmanager.appsettings["wordtablecellseparator"]; string wordtablerowseparator = configurationmanager.appsettings["wordtablerowseparator"]; string wordtableseparator = configurationmanager.appsettings["wordtableseparator"]; // string capturewordheader = configurationmanager.appsettings["capturewordheader"]; string capturewordfooter = configurationmanager.appsettings["capturewordfooter"]; string capturewordtable = configurationmanager.appsettings["capturewordtable"]; string capturewordimage = configurationmanager.appsettings["capturewordimage"]; // string capturewordimagefilename = configurationmanager.appsettings["capturewordimagefilename"]; // string filetext = string.empty; stringbuilder sbfiletext = new stringbuilder(); #region 打开文档 xwpfdocument document = null; try { using (filestream file = new filestream(filename, filemode.open, fileaccess.read)) { document = new xwpfdocument(file); } } catch (exception e) { loghandler.logwrite(string.format("文件{0}打开失败,错误:{1}", new string[] { filename, e.tostring() })); } #endregion #region 页眉、页脚 //页眉 if (capturewordheader == "true") { sbfiletext.appendline("capture header begin"); foreach (xwpfheader xwpfheader in document.headerlist) { sbfiletext.appendline(string.format("{0}", new string[] { xwpfheader.text })); } sbfiletext.appendline("capture header end"); } //页脚 if (capturewordfooter == "true") { sbfiletext.appendline("capture footer begin"); foreach (xwpffooter xwpffooter in document.footerlist) { sbfiletext.appendline(string.format("{0}", new string[] { xwpffooter.text })); } sbfiletext.appendline("capture footer end"); } #endregion #region 表格 if (capturewordtable == "true") { sbfiletext.appendline("capture table begin"); foreach (xwpftable table in document.tables) { //循环表格行 foreach (xwpftablerow row in table.rows) { foreach (xwpftablecell cell in row.gettablecells()) { sbfiletext.append(cell.gettext()); // sbfiletext.append(wordtablecellseparator); } sbfiletext.append(wordtablerowseparator); } sbfiletext.append(wordtableseparator); } sbfiletext.appendline("capture table end"); } #endregion #region 图片 if (capturewordimage == "true") { sbfiletext.appendline("capture image begin"); foreach (xwpfpicturedata picturedata in document.allpictures) { string picextname = picturedata.suggestfileextension(); string picfilename = picturedata.getfilename(); byte[] picfilecontent = picturedata.getdata(); // string pictempname = string.format(capturewordimagefilename, new string[] { guid.newguid().tostring() + "_" + picfilename + "." + picextname }); // using (filestream fs = new filestream(pictempname, filemode.create, fileaccess.write)) { fs.write(picfilecontent, 0, picfilecontent.length); fs.close(); } // sbfiletext.appendline(pictempname); } sbfiletext.appendline("capture image end"); } #endregion //正文段落 sbfiletext.appendline("capture paragraph begin"); foreach (xwpfparagraph paragraph in document.paragraphs) { sbfiletext.appendline(paragraph.paragraphtext); } sbfiletext.appendline("capture paragraph end"); // // filetext = sbfiletext.tostring(); return filetext; } } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。