C#网站生成静态页面的实例讲解

程序员文章站 2023-12-12 21:11:34

在一些需要经常更新页面数据的网站中，一般访问量不是很大的都直接发布的是带后台代码，每次访问都是有数据库交互的。但是一旦访问量增加了，那么这些服务器开销变成本就要考虑进来了，...

在一些需要经常更新页面数据的网站中，一般访问量不是很大的都直接发布的是带后台代码，每次访问都是有数据库交互的。但是一旦访问量增加了，那么这些服务器开销变成本就要考虑进来了，像一些文章，后台编辑后，文章内容存入数据库，如果1000人访问，如果还是每次取数据库，那这1000次的io访问就显得比较大了，一个好的方法就是，文章确定之后，做成静态页面，而这个做的方法由程序来做，就是递归遍历整个网站，将网站内容都访问一遍，然后生成这些页面的静态文本页面，在将这些页面发布，这样对浏览者而言，他看到的还是同一个地址，同一份文章，只是这份是静态的而言。这样就提升了网站的效率节约了资源；

下面附上一份c#遍历网站内容，然后生成内容页面代码：

private arraylist htmlcreatedlist = new arraylist();
    /// <summary>
    /// 递归实现页面静态化功能
    /// </summary>
    /// <param name="urlstring">要访问的页面链接地址</param>
    public void savehtmlcode(string urlstring)
    {
      if (htmlcreatedlist.contains(urlstring))
      {
        return;
      }
      string htmlcode = gethtmlcodefromurl(urlstring);
      string htmlpath = urlstring.tophysicalpath();
      string direchtmlpath = path.getdirectoryname(htmlpath);
      if (!directory.exists(direchtmlpath))
      {
        directory.createdirectory(direchtmlpath);
      }
      file.writealltext(htmlpath, htmlcode);
      htmlcreatedlist.add(urlstring);
      var urllist = geturllinkfromhtmlcode(htmlcode);
      string urltemp = string.empty;
      foreach (string url in urllist)
      {
        urltemp = url;
        urltemp = regex.replace(urltemp, "href\\s*=\\s*", "");
        urltemp = urltemp.replace("\"", "");
        urltemp = urltemp.replace("\\", "/");
        urltemp = webconfiginfo.urlprefix + urltemp;
        savehtmlcode(urltemp);
      }
    }
    /// <summary>
    /// 通过httpwebrequest页面链接的html代码
    /// </summary>
    /// <param name="urlstring">页面链接地址</param>
    /// <returns>页面链接对应的html代码</returns>
    private string gethtmlcodefromurl(string urlstring)
    {
      httpwebrequest hwrequest = (httpwebrequest)webrequest.create(urlstring);
      hwrequest.useragent = "user-agent:mozilla/4.0 (compatible; msie 6.0; windows nt 5.2; .net clr 1.0.3705";
      hwrequest.accept = "*/*";
      hwrequest.keepalive = true;
      hwrequest.headers.add("accept-language", "zh-cn,en-us;q=0.5");
      httpwebresponse hwresponse = (httpwebresponse)hwrequest.getresponse();
      stream streamresponse = hwresponse.getresponsestream();
      streamreader readerofstream = new streamreader(streamresponse, system.text.encoding.getencoding("utf-8"));
      string strhtml = readerofstream.readtoend();
      readerofstream.close();
      streamresponse.close();
      hwresponse.close();
      return strhtml;
    }
    ///<summary>
    ///正则表达式匹配出html代码中的超链接
    ///</summary>
    ///<param name="htmlcode">要找出超链接的html代码</param>
    ///<returns></returns>
    private ienumerable<string> geturllinkfromhtmlcode(string htmlcode)
    {
      string strregex = "href\\s*=\\s*(?:[\"'](?<1>[^\"'.#:]*)[\"'])";
      regex r = new regex(strregex, regexoptions.ignorecase);
      matchcollection ms = r.matches(htmlcode);
      ienumerable<string> listurl = from match cc in ms select cc.tostring().replace("&", "&");
      return listurl.distinct();
    }
  }

给string 扩展了一个方法。

public static string tophysicalpath(this string urlstring)
    {
      system.uri uri = new system.uri(urlstring);
      string htmlpath = string.format("{0}\\html\\{1}\\", system.web.httpcontext.current.request.physicalapplicationpath, uri.absolutepath);
      string[] querys = uri.query.split(new char[] { '?', '&', '=' }, stringsplitoptions.removeemptyentries);
      htmlpath += string.join(string.empty, querys);
      htmlpath += querys.length.equals(0) ? "index.html" : ".html";
      htmlpath = htmlpath.replace("/", "\\");
      htmlpath = htmlpath.replace("\\\\", "\\");
      return htmlpath;
    }

总结

以上就是这篇文章的全部内容了，希望本文的内容对大家的学习或者工作具有一定的参考学习价值，谢谢大家对的支持。如果你想了解更多相关内容请查看下面相关链接