c#远程html数据抓取实例分享
程序员文章站
2024-02-17 23:52:52
复制代码 代码如下:/// /// 获取远程h...
复制代码 代码如下:
/// <summary>
/// 获取远程html
/// </summary>
/// <param name="url"></param>
/// <param name="methed"></param>
/// <param name="param"></param>
/// <param name="html"></param>
/// <returns></returns>
public static bool gethttp(string url, string methed, string param, out string html)
{
methed = methed.tolower();
if (param != null && methed == "get" && param.length > 0)
{
url += "?" + param;
}
try
{
msxml2.xmlhttp mx = new msxml2.xmlhttpclass();
mx.open(methed, url, false, null, null);
if (param != null && methed == "post" && param.length > 0)
{
mx.setrequestheader("content-length", param.length.tostring());
mx.setrequestheader("content-type", "application/x-www-form-urlencoded");
}
mx.send(param);
if (mx.readystate != 4)
{
html = "远程连接失败:-4";
return false;
}
html = mx.responsetext;
return true;
}
catch (exception ex)
{
html = "远程连接失败:"+ex.message;
return false;
}
}
public static bool gethttp1(string url, string methed, string param, string referer, string encode, out string html)
{
//return gethttp(url,methed,param,out html);
//string encode = "utf-8";
//string methed = sendtype.tostring();
if (param != null && methed == "get" && param.length > 0)
{
if (url.indexof("?") >= 0)
{
url += "&" + param;
}
else
{
url += "?" + param;
}
}
try
{
httpwebrequest webreq = (httpwebrequest)webrequest.create(url);
webreq.proxy=null;
webreq.timeout = 1000 * 6;
webreq.contenttype = "application/x-www-form-urlencoded";
webreq.useragent = "user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0";
//webreq.useragent = "mozilla/4.0 (compatible; msie 7.0; windows nt 6.1; wow64; trident/6.0; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; media center pc 6.0; .net4.0c; .net4.0e)";
//谷歌的:user-agent:mozilla/5.0 (windows nt 6.1; wow64) applewebkit/537.36 (khtml, like gecko) chrome/28.0.1500.95 safari/537.36
//火狐的:user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0
//标准格式为: 浏览器标识 (操作系统标识; 加密等级标识; 浏览器语言) 渲染引擎标识 版本信息
//webreq.allowautoredirect = false;
//频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”
//webreq.keepalive = false;
//webreq.protocolversion = httpversion.version10;
if (referer.length > 0)
{
webreq.referer = referer;
}
cookiecontainer mycookies = new cookiecontainer();
webreq.cookiecontainer = mycookies;
//if (this.cookielist != null)
//{
// webreq.cookiecontainer.add(this.getcookies(webreq.requesturi, this.cookielist));
//}
webreq.method = methed;
//post 开始
if (param != null && methed == "post")
{
byte[] arrbyte = encoding.getencoding(encode).getbytes(param);
webreq.contentlength = arrbyte.length;
stream newstream = webreq.getrequeststream();
newstream.write(arrbyte, 0, arrbyte.length);
newstream.close();
}
//post 结束
webresponse w = webreq.getresponse();
//返回html
using (httpwebresponse webres = (httpwebresponse)webreq.getresponse())
{
using (stream datastream = webres.getresponsestream())
{
using (streamreader reader = new streamreader(datastream, encoding.getencoding(encode)))
{
html = reader.readtoend();
//this.cookielist = webreq.cookiecontainer.getcookies(webreq.requesturi);
webreq.abort();//可能会解决卡住或阻塞问题
}
}
}
}
catch (exception ex)
{
html = "出现异常(httphelper.gethtml),远程连接失败:" + ex.message + " url:" + url;
//system.windows.forms.messagebox.show(html);
return false;
}
return true;
}
上一篇: 搭建java WEB开发环境和应用
推荐阅读
-
c#远程html数据抓取实例分享
-
java抓取网页数据获取网页中所有的链接实例分享
-
分享php代码将360浏览器导出的favdb的sqlite数据库文件转换为html_php实例
-
Python使用urllib2模块抓取HTML页面资源的实例分享
-
C#抓取网页数据 解析标题描述图片等信息 去除HTML标签
-
Python使用urllib2模块抓取HTML页面资源的实例分享
-
C#抓取网页数据 解析标题描述图片等信息 去除HTML标签
-
分享php代码将360浏览器导出的favdb的sqlite数据库文件转换为html_php实例
-
分享PHP源码批量抓取远程网页图片并保存到本地的实现方法_php实例
-
html中通过JS获取JSON数据并加载实例分享