欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

c#远程html数据抓取实例分享

程序员文章站 2024-02-17 23:52:52
复制代码 代码如下:///         /// 获取远程h...

复制代码 代码如下:

/// <summary>
        /// 获取远程html
        /// </summary>
        /// <param name="url"></param>
        /// <param name="methed"></param>
        /// <param name="param"></param>
        /// <param name="html"></param>
        /// <returns></returns>
        public static bool gethttp(string url, string methed, string param, out string html)
        {
            methed = methed.tolower();

            if (param != null && methed == "get" && param.length > 0)
            {
                url += "?" + param;
            }

            try
            {
                msxml2.xmlhttp mx = new msxml2.xmlhttpclass();

                mx.open(methed, url, false, null, null);

                if (param != null && methed == "post" && param.length > 0)
                {
                    mx.setrequestheader("content-length", param.length.tostring());
                    mx.setrequestheader("content-type", "application/x-www-form-urlencoded");
                }

                mx.send(param);

                if (mx.readystate != 4)
                {
                    html = "远程连接失败:-4";
                    return false;
                }
                html = mx.responsetext;
                return true;
            }
            catch (exception ex)
            {
                html = "远程连接失败:"+ex.message;
                return false;
            }
        }

        public static bool gethttp1(string url, string methed, string param, string referer, string encode, out string html)
        {
            //return gethttp(url,methed,param,out html);

            //string encode = "utf-8";
            //string methed = sendtype.tostring();

            if (param != null && methed == "get" && param.length > 0)
            {
                if (url.indexof("?") >= 0)
                {
                    url += "&" + param;
                }
                else
                {
                    url += "?" + param;
                }
            }

            try
            {
                httpwebrequest webreq = (httpwebrequest)webrequest.create(url);

                webreq.proxy=null;
                webreq.timeout = 1000 * 6;
                webreq.contenttype = "application/x-www-form-urlencoded";
                webreq.useragent = "user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0";

                //webreq.useragent = "mozilla/4.0 (compatible; msie 7.0; windows nt 6.1; wow64; trident/6.0; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; media center pc 6.0; .net4.0c; .net4.0e)";

                //谷歌的:user-agent:mozilla/5.0 (windows nt 6.1; wow64) applewebkit/537.36 (khtml, like gecko) chrome/28.0.1500.95 safari/537.36
                //火狐的:user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0
                //标准格式为: 浏览器标识 (操作系统标识; 加密等级标识; 浏览器语言) 渲染引擎标识 版本信息

                //webreq.allowautoredirect = false;

                //频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”
                //webreq.keepalive = false;
                //webreq.protocolversion = httpversion.version10;

                if (referer.length > 0)
                {
                    webreq.referer = referer;
                }

                cookiecontainer mycookies = new cookiecontainer();
                webreq.cookiecontainer = mycookies;

                //if (this.cookielist != null)
                //{
                //    webreq.cookiecontainer.add(this.getcookies(webreq.requesturi, this.cookielist));
                //}

                webreq.method = methed;

                //post 开始
                if (param != null && methed == "post")
                {
                    byte[] arrbyte = encoding.getencoding(encode).getbytes(param);
                    webreq.contentlength = arrbyte.length;

                    stream newstream = webreq.getrequeststream();
                    newstream.write(arrbyte, 0, arrbyte.length);
                    newstream.close();
                }
                //post 结束

 
                webresponse w = webreq.getresponse();

                //返回html
                using (httpwebresponse webres = (httpwebresponse)webreq.getresponse())
                {
                    using (stream datastream = webres.getresponsestream())
                    {
                        using (streamreader reader = new streamreader(datastream, encoding.getencoding(encode)))
                        {
                            html = reader.readtoend();
                            //this.cookielist = webreq.cookiecontainer.getcookies(webreq.requesturi);
                            webreq.abort();//可能会解决卡住或阻塞问题
                        }
                    }
                }
            }
            catch (exception ex)
            {

                html = "出现异常(httphelper.gethtml),远程连接失败:" + ex.message + " url:" + url;
                //system.windows.forms.messagebox.show(html);
                return false;
            }

            return true;
        }