C#如何解析http报文

程序员文章站 2023-11-12 15:37:40

下面通过一段内容有文字说明有代码分析，并附有展示图供大家学习。要解析http报文，需要实现以下操作：读取http报头提供的各种属性分析属性值，从中获取内容编码和...

下面通过一段内容有文字说明有代码分析，并附有展示图供大家学习。

要解析http报文，需要实现以下操作：

读取http报头提供的各种属性
分析属性值，从中获取内容编码和字符集编码
将报头数据和内容进行分离
判断内容是否文本还是二进制，如果是二进制的则不进行处理
如果内容是文本，按报头中提供的内容编码和字符集编码进行解压缩和解码
目前没有找到.net框架内置的解析方法，理论上httpclient等类在内部应该已经实现了解析，但不知为何没有公开这些处理方法。（亦或是我没找到）

那么只能自己来解析这些数据了。

我们先来看看这个经过gzip压缩的文本内容的http报文：

C#如何解析http报文

这里提供一个老外写的简陋的解析类（已经过修改，原代码中存在一些严重bug）：

public enum httpheaderfield
{
 accept = 0,
 accept_charset = 1,
 accept_encoding = 2,
 accept_language = 3,
 accept_ranges = 4,
 authorization = 5,
 cache_control = 6,
 connection = 7,
 cookie = 8,
 content_length = 9,
 content_type = 10,
 date = 11,
 expect = 12,
 from = 13,
 host = 14,
 if_match = 15,
 if_modified_since = 16,
 if_none_match = 17,
 if_range = 18,
 if_unmodified_since = 19,
 max_forwards = 20,
 pragma = 21,
 proxy_authorization = 22,
 range = 23,
 referer = 24,
 te = 25,
 upgrade = 26,
 user_agent = 27,
 via = 28,
 warn = 29,
 age = 30,
 allow = 31,
 content_encoding = 32,
 content_language = 33,
 content_location = 34,
 content_disposition = 35,
 content_md5 = 36,
 content_range = 37,
 etag = 38,
 expires = 39,
 last_modified = 40,
 location = 41,
 proxy_authenticate = 42,
 refresh = 43,
 retry_after = 44,
 server = 45,
 set_cookie = 46,
 trailer = 47,
 transfer_encoding = 48,
 vary = 49,
 warning = 50,
 www_authenticate = 51
};
class httpheader
{
 #region properties
 private string[] m_strhttpfield = new string[52];
 private byte[] m_bytedata = new byte[4096];
 public string[] httpfield
 {
  get { return m_strhttpfield; }
  set { m_strhttpfield = value; }
 }
 public byte[] data
 {
  get { return m_bytedata; }
  set { m_bytedata = value; }
 }
 #endregion
 // convertion
 system.text.asciiencoding encoding = new system.text.asciiencoding();
 #region constructeur
 /// <summary>
 /// constructeur par défaut - non utilisé
 /// </summary>
 private httpheader()
 { }
 public httpheader(byte[] bytehttprequest)
 {
  string httprequest = encoding.getstring(bytehttprequest);
  try
  {
   int indexheaderend;
   string header;
   // si la taille de requête est supérieur ou égale à 1460, alors toutes la chaine est l'entête http
   if (httprequest.length <= 1460)
    header = httprequest;
   else
   {
    indexheaderend = httprequest.indexof("\r\n\r\n");
    header = httprequest.substring(0, indexheaderend);
    data = bytehttprequest.skip(indexheaderend + 4).toarray();
   }
   httpheaderparse(header);
  }
  catch (exception)
  { }
 }
 #endregion
 #region methodes
 private void httpheaderparse(string header)
 {
  #region http header request & response
  httpheaderfield hhfield;
  string httpfield, buffer;
  int index;
  foreach (int indexhttpfield in enum.getvalues(typeof(httpheaderfield)))
  {
   hhfield = (httpheaderfield)indexhttpfield;
   httpfield = "\n" + hhfield.tostring().replace('_', '-') + ": "; //ajout de \n devant pour éviter les doublons entre cookie et set_cookie
   // si le champ n'est pas présent dans la requête, on passe au champ suivant
   index = header.indexof(httpfield);
   if (index == -1)
    continue;
   buffer = header.substring(index + httpfield.length);
   index = buffer.indexof("\r\n");
   if (index == -1)
    m_strhttpfield[indexhttpfield] = buffer.trim();
   else
    m_strhttpfield[indexhttpfield] = buffer.substring(0, index).trim();
   //console.writeline("index = " + indexhttpfield + " | champ = " + httpfield.substring(1) + " " + m_strhttpfield[indexhttpfield]);
  }
  // affichage de tout les champs
  /*for (int j = 0; j < m_strhttpfield.length; j++)
  {
   hhfield = (httpheaderfield)j;
   console.writeline("m_strhttpfield[" + j + "]; " + hhfield + " = " + m_strhttpfield[j]);
  }
  */
  #endregion
 }
 #endregion
}

编写以下代码以实现解析文件：

class program
{
 static void main(string[] args)
 {
  srart: console.writeline("输入待解析的http报文数据文件完整路径：");
  var filename = console.readline();
  try
  {
   filestream fs = new filestream(filename, filemode.open);
   binaryreader br = new binaryreader(fs);
   var data = br.readbytes((int)fs.length);
   var header = new httpheader(data);
   var x = 0;
   foreach (var f in header.httpfield)
   {
    if (!string.isnullorempty(f))
    {
     console.writeline($"[{x:00}] - {(httpheaderfield) x} : {f}");
    }
    x++;
   }
   console.writeline($"总数据尺寸{fs.length}字节，实际数据尺寸{header.data.length}字节");
   console.writeline(encoding.utf8.getstring(header.data));
   console.writeline();
   br.close();
   fs.close();  
  }
  catch (exception e)
  {
   console.writeline(e);
  }
  goto srart;
 }
}

这里还未实现gzip解压缩和字符解码，直接用utf8解码输出的。（需要时再写吧，都是体力活儿~）

效果图展示：

C#如何解析http报文