欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

asp.net HttpWebRequest自动识别网页编码

程序员文章站 2022-04-29 12:58:02
复制代码 代码如下:static string getencoding(string url) { httpwebrequest request = null; httpw...

复制代码 代码如下:

static string getencoding(string url)
{
httpwebrequest request = null;
httpwebresponse response = null;
streamreader reader = null;
try
{
request = (httpwebrequest)webrequest.create(url);
request.timeout = 20000;
request.allowautoredirect = false;

response = (httpwebresponse)request.getresponse();
if (response.statuscode == httpstatuscode.ok && response.contentlength < 1024 * 1024)
{
if (response.contentencoding != null && response.contentencoding.equals("gzip", stringcomparison.invariantcultureignorecase))
reader = new streamreader(new gzipstream(response.getresponsestream(), compressionmode.decompress));
else
reader = new streamreader(response.getresponsestream(), encoding.ascii);

string html = reader.readtoend();

regex reg_charset = new regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.ismatch(html))
{
return reg_charset.match(html).groups["charset"].value;
}
else if (response.characterset != string.empty)
{
return response.characterset;
}
else
return encoding.default.bodyname;
}
}
catch
{
}
finally
{

if (response != null)
{
response.close();
response = null;
}
if (reader != null)
reader.close();

if (request != null)
request = null;

}

return encoding.default.bodyname;
}

/// <summary>
/// 获取源代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
static string gethtml(string url, encoding encoding)
{
httpwebrequest request = null;
httpwebresponse response = null;
streamreader reader = null;
try
{
request = (httpwebrequest)webrequest.create(url);
request.timeout = 20000;
request.allowautoredirect = false;

response = (httpwebresponse)request.getresponse();
if (response.statuscode == httpstatuscode.ok && response.contentlength < 1024 * 1024)
{
if (response.contentencoding != null && response.contentencoding.equals("gzip", stringcomparison.invariantcultureignorecase))
reader = new streamreader(new gzipstream(response.getresponsestream(), compressionmode.decompress), encoding);
else
reader = new streamreader(response.getresponsestream(), encoding);
string html = reader.readtoend();

return html;
}
}
catch
{
}
finally
{

if (response != null)
{
response.close();
response = null;
}
if (reader != null)
reader.close();

if (request != null)
request = null;

}

return string.empty;
}