使用NOPI读取Word、Excel文档内容

程序员文章站 2023-12-16 11:09:04

使用nopi读取excel的例子很多，读取word的例子不多。 excel的解析方式有多中，可以使用odbc查询，把excel作为一个数据集对待。也可以使用文档结构模型的...

使用nopi读取excel的例子很多，读取word的例子不多。

excel的解析方式有多中，可以使用odbc查询，把excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析，即解析workbook（工作簿）、sheet、row、column。

word的解析比较复杂，因为word的文档结构模型定义较为复杂。解析word或者excel，关键是理解word、excel的文档对象模型。

word、excel文档对象模型的解析，可以通过com接口调用，此类方式使用较广。（可以录制宏代码，然后替换为对应的语言）

也可以使用xml模型解析，尤其是对于2007、2010版本的文档的解析。

using npoi.poifs.filesystem;
using npoi.ss.usermodel;
using npoi.xssf.usermodel;
using npoi.xwpf.usermodel;
using system;
using system.collections.generic;
using system.configuration;
using system.io;
using system.text;

namespace eyuan
{
  public static class nopihandler
  {
    /// <summary>
    /// 
    /// </summary>
    /// <param name="filename"></param>
    /// <returns></returns>
    public static list<list<list<string>>> readexcel(string filename)
    {
      //打开excel工作簿
      xssfworkbook hssfworkbook = null;
      try
      {
        using (filestream file = new filestream(filename, filemode.open, fileaccess.read))
        {
          hssfworkbook = new xssfworkbook(file);
        }
      }
      catch (exception e)
      {
        loghandler.logwrite(string.format("文件{0}打开失败，错误：{1}", new string[] { filename, e.tostring() }));
      }
      //循环sheet页
      int sheetscount = hssfworkbook.numberofsheets;
      list<list<list<string>>> workbookcontent = new list<list<list<string>>>();
      for (int i = 0; i < sheetscount; i++)
      {
        //sheet索引从0开始
        isheet sheet = hssfworkbook.getsheetat(i);
        //循环行
        list<list<string>> sheetcontent = new list<list<string>>();
        int rowcount = sheet.physicalnumberofrows;
        for (int j = 0; j < rowcount; j++)
        {
          //row（逻辑行）的索引从0开始
          irow row = sheet.getrow(j);
          //循环列（各行的列数可能不同）
          list<string> rowcontent = new list<string>();
          int cellcount = row.physicalnumberofcells;
          for (int k = 0; k < cellcount; k++)
          {
            //icell cell = row.getcell(k);
            icell cell = row.cells[k];
            if (cell == null)
            {
              rowcontent.add("nil");
            }
            else
            {
              rowcontent.add(cell.tostring());
              //rowcontent.add(cell.stringcellvalue);
            }
          }
          //添加行到集合中
          sheetcontent.add(rowcontent);
        }
        //添加sheet到集合中
        workbookcontent.add(sheetcontent);
      }

      return workbookcontent;
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="filename"></param>
    /// <returns></returns>
    public static string readexceltext(string filename)
    {
      string excelcellseparator = configurationmanager.appsettings["excelcellseparator"];
      string excelrowseparator = configurationmanager.appsettings["excelrowseparator"];
      string excelsheetseparator = configurationmanager.appsettings["excelsheetseparator"];
      //
      list<list<list<string>>> excelcontent = readexcel(filename);
      string filetext = string.empty;
      stringbuilder sbfiletext = new stringbuilder();
      //循环处理workbook中的各sheet页
      list<list<list<string>>>.enumerator enumeratorworkbook = excelcontent.getenumerator();
      while (enumeratorworkbook.movenext())
      {

        //循环处理当期sheet页中的各行
        list<list<string>>.enumerator enumeratorsheet = enumeratorworkbook.current.getenumerator();
        while (enumeratorsheet.movenext())
        {

          string[] rowcontent = enumeratorsheet.current.toarray();
          sbfiletext.append(string.join(excelcellseparator, rowcontent));
          sbfiletext.append(excelrowseparator);
        }
        sbfiletext.append(excelsheetseparator);
      }
      //
      filetext = sbfiletext.tostring();
      return filetext;
    }

    /// <summary>
    /// 读取word内容
    /// </summary>
    /// <param name="filename"></param>
    /// <returns></returns>
    public static string readwordtext(string filename)
    {
      string wordtablecellseparator = configurationmanager.appsettings["wordtablecellseparator"];
      string wordtablerowseparator = configurationmanager.appsettings["wordtablerowseparator"];
      string wordtableseparator = configurationmanager.appsettings["wordtableseparator"];
      //
      string capturewordheader = configurationmanager.appsettings["capturewordheader"];
      string capturewordfooter = configurationmanager.appsettings["capturewordfooter"];
      string capturewordtable = configurationmanager.appsettings["capturewordtable"];
      string capturewordimage = configurationmanager.appsettings["capturewordimage"];
      //
      string capturewordimagefilename = configurationmanager.appsettings["capturewordimagefilename"];
      //
      string filetext = string.empty;
      stringbuilder sbfiletext = new stringbuilder();

      #region 打开文档
      xwpfdocument document = null;
      try
      {
        using (filestream file = new filestream(filename, filemode.open, fileaccess.read))
        {
          document = new xwpfdocument(file);
        }
      }
      catch (exception e)
      {
        loghandler.logwrite(string.format("文件{0}打开失败，错误：{1}", new string[] { filename, e.tostring() }));
      }
      #endregion

      #region 页眉、页脚
      //页眉
      if (capturewordheader == "true")
      {
        sbfiletext.appendline("capture header begin");
        foreach (xwpfheader xwpfheader in document.headerlist)
        {
          sbfiletext.appendline(string.format("{0}", new string[] { xwpfheader.text }));
        }
        sbfiletext.appendline("capture header end");
      }
      //页脚
      if (capturewordfooter == "true")
      {
        sbfiletext.appendline("capture footer begin");
        foreach (xwpffooter xwpffooter in document.footerlist)
        {
          sbfiletext.appendline(string.format("{0}", new string[] { xwpffooter.text }));
        }
        sbfiletext.appendline("capture footer end");
      }
      #endregion

      #region 表格
      if (capturewordtable == "true")
      {
        sbfiletext.appendline("capture table begin");
        foreach (xwpftable table in document.tables)
        {
          //循环表格行
          foreach (xwpftablerow row in table.rows)
          {
            foreach (xwpftablecell cell in row.gettablecells())
            {
              sbfiletext.append(cell.gettext());
              //
              sbfiletext.append(wordtablecellseparator);
            }

            sbfiletext.append(wordtablerowseparator);
          }
          sbfiletext.append(wordtableseparator);
        }
        sbfiletext.appendline("capture table end");
      }
      #endregion

      #region 图片
      if (capturewordimage == "true")
      {
        sbfiletext.appendline("capture image begin");
        foreach (xwpfpicturedata picturedata in document.allpictures)
        {
          string picextname = picturedata.suggestfileextension();
          string picfilename = picturedata.getfilename();
          byte[] picfilecontent = picturedata.getdata();
          //
          string pictempname = string.format(capturewordimagefilename, new string[] { guid.newguid().tostring() + "_" + picfilename + "." + picextname });
          //
          using (filestream fs = new filestream(pictempname, filemode.create, fileaccess.write))
          {
            fs.write(picfilecontent, 0, picfilecontent.length);
            fs.close();
          }
          //
          sbfiletext.appendline(pictempname);
        }
        sbfiletext.appendline("capture image end");
      }
      #endregion

      //正文段落
      sbfiletext.appendline("capture paragraph begin");
      foreach (xwpfparagraph paragraph in document.paragraphs)
      {
        sbfiletext.appendline(paragraph.paragraphtext);

      }
      sbfiletext.appendline("capture paragraph end");
      //

      //
      filetext = sbfiletext.tostring();
      return filetext;
    }


  }
}

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持。

使用NOPI读取Word、Excel文档内容

使用NOPI读取Word、Excel文档内容

Java使用poi包读取Excel文档代码分享

C#使用NOPI库实现导入Excel文档

Ubuntu下使用python读取doc和docx文档的内容方法

C#使用NOPI库实现导入Excel文档

使用Excel中的插入对象功能在Excel中插入Word文档

使用Excel中的插入对象功能在Excel中插入Word文档

使用rand函数在Word文档中快速产生模拟文字内容

使用python批量读取word文档并整理关键信息到excel表格的实例

使用VBA读取指定Excel文件指定Sheet指定单元格的内容