C#模拟用户登录,数据抓取包括分页爬取插入数据库中
程序员文章站
2022-10-07 08:18:29
//涉及到的dll,Winista.HtmlParser 获取表单数据 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExp... ......
//涉及到的dll,Winista.HtmlParser 获取表单数据
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Net; using System.IO; using Topevery.DUM.Reported.Entity; using Topevery.DUM.ReportedService.Code; using System.Data; using Topevery.DUM.GridingManagement.Entity.Event; using Topevery.DUM.GridingManagement.Logic; using Topevery.DUM.GridingManagement.Entity; using Topevery.Framework.MonitorServices; using Topevery.DUM.ZhaQuServer._12345CrawlAdd; using System.Diagnostics; using Winista.Text.HtmlParser; using Winista.Text.HtmlParser.Data; using Winista.Text.HtmlParser.Extractors; using Winista.Text.HtmlParser.Filters; using Winista.Text.HtmlParser.Http; using Winista.Text.HtmlParser.Lex; using Winista.Text.HtmlParser.Nodes; using Winista.Text.HtmlParser.Scanners; using Winista.Text.HtmlParser.Support; using Winista.Text.HtmlParser.Tags; using Winista.Text.HtmlParser.Util; using Winista.Text.HtmlParser.Visitors; namespace Topevery.DUM.ZhaQuServer.Code { public class HtmlPagerParser { // Fields private _2345CrawlAdd _12345; private CookieContainer cc; string lastKey = ""; Dictionary<string, string> dicList = new Dictionary<string, string>();//存放事件编号与所属流程等信息 // Methods public HtmlPagerParser() { this.cc = new CookieContainer(); this._12345 = new _2345CrawlAdd(); try { this.Login(); } catch (Exception exception) { LogHelper.Log.Error(exception); } } public HtmlPagerParser(string login_name, string pwss_word) { this.cc = new CookieContainer(); this._12345 = new _2345CrawlAdd(); try { this.Login(login_name, pwss_word); LogHelper.Log.Debug(string.Format("用户:{0} 登录成功。", login_name)); } catch (Exception exception) { LogHelper.Log.Error(exception); } } private AcceptParameter BuildEntityFrom12345(EventInPara para) { AcceptParameter parameter = new AcceptParameter(); parameter.Desc = string.Format("{0} {1}", para.EventTitle, para.EventDescript); parameter.Reporter = para.ClientName; parameter.TelNum = string.Format("{0},{1}", para.Phone1, para.Phone2); parameter.DbCreateDate = new DateTime?(para.EventProcessTime); parameter.ReplyType = 1; parameter.Event_id = para.EventID.ToString(); parameter.ReplyWay = para.Phone1; return parameter; } private string[] Get12345ContentDetail(string id) { try { string strResult = ReturnHtml(id); if (!string.IsNullOrEmpty(strResult)) { try { Regex tiqu = new Regex("(?<=<table class='moform'[^>]*>)[\\s\\S]*?(?=</table>)"); MatchCollection mc = tiqu.Matches(strResult, 0); string[] num = new string[mc.Count]; for (int i = 0; i < mc.Count; i++) { num[i] = mc[i].ToString(); } //---获取事件详细信息--- //string a = num[1]; string a = num[0]; Regex tiqus = new Regex("(?<=<tr[^>]*>)[\\s\\S]*?(?=</tr>)"); MatchCollection mcs = tiqus.Matches(a, 0); string[] sb = new string[mcs.Count + 5]; for (int i = 0; i < mcs.Count + 3; i++) { string bc = Regex.Replace(a, "(<.+?>)|(\\s*)|( )", "", RegexOptions.IgnoreCase); string Des = Regex.Replace(num[2], "(<.+?>)|(\\s*)|( )", "", RegexOptions.IgnoreCase); string Reply = Regex.Replace(num[4], "(<.+?>)|(\\s*)|( )", "", RegexOptions.IgnoreCase); string replyName = Reply.Split(';')[3].ToString().Split('】')[3].ToString(); int fwdx = bc.IndexOf("服务对象姓名 ") + 12; int numName = bc.IndexOf("转办时间 ") + 10; int numPhone1 = bc.IndexOf("联系电话1 ") + 11; int numPhone2 = bc.IndexOf("联系电话2 ") + 11; int numPhone3 = bc.IndexOf("联系电话3 ") + 11; int yx = bc.IndexOf(" 邮箱地址") + 10; int numEventID = bc.IndexOf("工单编号 ") + 10; int numAgentID = bc.IndexOf("服务人员工号 ") + 12; int numTitle = Des.IndexOf("事项标题 "); int xxnr = Des.IndexOf("事项内容 ") + 10; int sxfj = Des.IndexOf("事项附件") + 4; switch (i) { case 0: //案件提交用户 --服务对象姓名 sb[i] = bc.Substring(fwdx, numName - fwdx - 10); break; case 1: //案件举报时间; --转办时间 sb[i] = bc.Substring(numName, numPhone1 - numName - 11); break; case 2: //联系电话1 sb[i] = bc.Substring(numPhone1, numPhone2 - numPhone1 - 11); string[] str = bc.Substring(42, 60).Split(';'); string time1 = str[1].Substring(0, str[1].IndexOf("联系电话")); break; case 3: //举报电话2 sb[i] = bc.Substring(numPhone2, numPhone3 - numPhone2 - 11); str = bc.Substring(42, 60).Split(';'); string time2 = str[2].Substring(0, str[2].IndexOf("联")); if (time2 == "无联系电话3 ") { time2 = null; } //sb[i] = time2; break; case 4: //举报电话3 int index = 0; sb[i] = bc.Substring(numPhone3, yx - numPhone3 - 10); str = bc.Substring(42, 60).Split(';'); string time3 = string.Empty; index = str[3].IndexOf("&"); if (index != -1) { time3 = str[3].Substring(0, index); } break; case 5: //案件举报时间 --转办时间 sb[i] = bc.Substring(numName, numPhone1 - numName - 11); break; case 6: //案件举报时间 --转办时间 sb[i] = bc.Substring(numName, numPhone1 - numName - 11); break; case 7: //12345案件号 sb[i] = bc.Substring(numEventID, numAgentID - numEventID - 12); break; case 8: //举报电话 sb[i] = bc.Substring(numPhone1, numPhone2 - numPhone1 - 11); //(此处应为事项内容)备注 sb[9] = Des.Substring(xxnr, sxfj - xxnr - 4); sb[10] = replyName + "!"; break; } } return sb; } catch (Exception ex) { LogHelper.Log.Error(ex.Message, ex); } } } catch (Exception ex) { LogHelper.Log.Error(ex.Message, ex); } return null; } public void Get12345Data() { Dictionary<string,string> dicList = this.Get12345ProjectCodes(); List<string> ids=dicList.Keys.ToList<string>(); List<string> ids2 = new List<string>(); DataTable table = new DataTable(); table = this._12345.GetEvent_id(); if (ids != null) { for (int i = 0; i < ids.Count; i++) { if (!string.IsNullOrEmpty(ids[i])) { if (table.Select(string.Format("c_event_id='{0}'", ids[i])).Length > 0) { LogHelper.Log.Debug(string.Format("案件ID{0}的案件已存在系统中",ids[i])); } else { ids2.Add(ids[i]); } } } for (int i = 0; i < ids2.Count; i++) { if (ids2[i] != null) { try { string[] strArray3 = this.Get12345ContentDetail(ids2[i]); LogHelper.Log.Info("获取内容页数据成功!"); if (strArray3 != null) { LogHelper.Log.Info(string.Format("案件信息:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10}", new object[] { strArray3[0], strArray3[1], strArray3[2], strArray3[3], strArray3[4], strArray3[5], strArray3[6], strArray3[7], strArray3[8], strArray3[9], strArray3[10] })); this._12345.StartEvents(strArray3[0], strArray3[1], strArray3[2], strArray3[3], strArray3[4], strArray3[5], strArray3[6], strArray3[7], strArray3[8], strArray3[9], strArray3[10]); LogHelper.Log.Info(string.Format("3插入案件成功", new object[0])); } } catch (Exception exception) { LogHelper.Log.Error("插入案件异常:" + exception); } } } LogHelper.Log.Info(ids2.Count == 0 ? DateTime.Now + "现在没有案件更新" : string.Concat(new object[] { DateTime.Now, " 案件更新", ids2.Count, "条" })); } } public void Get12345Data(Guid TargetId, Guid DistrictID, Guid StreetID) { //模拟登陆 string strResult = ReturnHtml(); //int testindex = strResult.IndexOf("查看2018年") + 7; //int laseindex = strResult.LastIndexOf("件事项</a>") + 7; //int zys = Convert.ToInt32(strResult.Substring(testindex, laseindex - testindex - 7)); //int totalPageCount = zys; //案件列表总页数 int totalPageCount = 90; //GetTotalPageCount(ref totalPageCount); if (totalPageCount > 1) { int testeC = 0; for (int hk = 0; hk < 30; hk++) { if (hk > totalPageCount - 1)//不足10页就提前中断 break; dicList.Clear(); Dictionary<string, string> tempDic = Get12345ProjectCodes(hk);//获取案件的编号列表 List<string> ids = tempDic.Keys.ToList<string>(); List<string> ids2 = new List<string>(); DataTable dt = new DataTable(); dt = _12345.GetEvent_id(); if (dicList != null) { for (int i = 0; i < dicList.Count; i++) { if (!string.IsNullOrEmpty(ids[i])) { if (dt.Select(string.Format("c_event_id='{0}'", ids[i])).Length > 0) { LogHelper.Log.Debug(string.Format("案件ID{0}的案件已存在系统中,第{1},总循环数{2},获取页面数{3}", ids[i], i, dicList.Count,hk)); } else { ids2.Add(ids[i]); LogHelper.Log.Debug(string.Format("案件ID{0}将进入系统,第{1},总循环数{2},获取页面数{3}", ids[i], i, dicList.Count, hk)); } } } int count = 0; LogHelper.Log.Info("每次获取的案件ID集合数量" + ids2.Count); for (int i = 0; i < ids2.Count; i++) { if (ids2[i] != null) { try { count++; string[] nums = Get12345ContentDetail(ids2[i]);//获取小类信息 LogHelper.Log.Debug("1获取内容页数据成功!"); if (nums != null) { testeC++; //***nums0/案件提交用户,nums1/案件举报时间, //nums2/举报电话,nums3/举报电话1, //nums4/举报电话2,nums5/案件举报时间,nums6/案件举报时间, //nums7/12345案件号,nums8/举报电话,nums9/备注 LogHelper.Log.Debug(string.Format(@"案件信息:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10};目标环节的办理者Id:{11}", nums[0], nums[1], nums[2] , nums[3], nums[4], nums[5], nums[6], nums[7], nums[8], nums[9], nums[10], TargetId)); _12345.StartEvents_New(nums[0], nums[1], nums[2], nums[3], nums[4], nums[5], nums[6], nums[7], nums[8], nums[9], nums[10], TargetId, DistrictID, StreetID); LogHelper.Log.Info("抓取案件数:" + testeC + "。1插入数据库编号" + nums[7]); } } catch (Exception ex) { LogHelper.Log.Error("插入案件异常:" + ex); } } } if (count == 0)//判断是否有案件跟新 { LogHelper.Log.Info(DateTime.Now + "现在没有案件更新"); } else { LogHelper.Log.Info(DateTime.Now + " 案件更新" + count + "条"); } } } } else { Dictionary<string, string> dicList = this.Get12345ProjectCodes(); List<string> ids = dicList.Keys.ToList<string>(); List<string> ids2 = new List<string>(); DataTable table = new DataTable(); table = this._12345.GetEvent_id(); if (ids != null) { for (int i = 0; i < ids.Count; i++) { if (!string.IsNullOrEmpty(ids[i])) { if (table.Select(string.Format("c_event_id='{0}'", ids[i])).Length > 0) { LogHelper.Log.Debug(string.Format("案件ID{0}的案件已存在系统中", ids[i])); } else { ids2.Add(ids[i]); } } } for (int i = 0; i < ids2.Count; i++) { if (ids2[i] != null) { try { string[] strArray3 = this.Get12345ContentDetail(ids2[i]); LogHelper.Log.Info("获取内容页数据成功!"); if (strArray3 != null) { LogHelper.Log.Info(string.Format("案件信息:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10};目标环节的办理者Id:{11}", new object[] { strArray3[0], strArray3[1], strArray3[2], strArray3[3], strArray3[4], strArray3[5], strArray3[6], strArray3[7], strArray3[8], strArray3[9], strArray3[10], TargetId })); this._12345.StartEvents_New(strArray3[0], strArray3[1], strArray3[2], strArray3[3], strArray3[4], strArray3[5], strArray3[6], strArray3[7], strArray3[8], strArray3[9], strArray3[10], TargetId, DistrictID, StreetID); LogHelper.Log.Info(string.Format("2插入案件成功", new object[0])); } } catch (Exception exception) { LogHelper.Log.Error("插入案件异常:" + exception); } } } LogHelper.Log.Info(ids2.Count == 0 ? DateTime.Now + "现在没有案件更新" : string.Concat(new object[] { DateTime.Now, " 案件更新", ids2.Count, "条" })); } } } private void RecursionHtmlNode(INode htmlNode, bool siblingRequired) { if (htmlNode == null) return; try { if (htmlNode is ITag) { ITag tag = (htmlNode as ITag); if (!tag.IsEndTag()) { //if (tag.TagName.ToLower().Trim() == "span") if (tag.TagName.ToLower().Trim() == "b") { if (((Winista.Text.HtmlParser.Nodes.TagNode)(tag)).NextSibling.GetText().Contains("工单编号")) { string key = htmlNode.NextSibling.ToPlainTextString().Split(new char[] { ':' })[1].Trim(); lastKey = key; if (dicList.ContainsKey(key) == false) { dicList.Add(key, ""); } } if (((Winista.Text.HtmlParser.Nodes.TagNode)(tag)).NextSibling.GetText().Contains("所属流程")) { if (dicList.ContainsKey(lastKey)) { dicList[lastKey] = htmlNode.NextSibling.ToPlainTextString().Trim(); } } //if (tag.FirstChild.GetText().Contains("事项编号")) //{ // string key = htmlNode.Parent.ToPlainTextString().Split(new char[] { ':' })[1].Trim(); // lastKey = key; // if (dicList.ContainsKey(key) == false) // { // dicList.Add(key, ""); // } //} //if (tag.FirstChild.GetText().Contains("所属流程")) //{ // if (dicList.ContainsKey(lastKey)) // { // dicList[lastKey] = htmlNode.Parent.ToPlainTextString().Trim(); // } //} } } } //获取节点间的内容 if (htmlNode.Children != null && htmlNode.Children.Count > 0) { RecursionHtmlNode(htmlNode.FirstChild, true); } //the sibling nodes if (siblingRequired) { INode sibling = htmlNode.NextSibling; while (sibling != null) { RecursionHtmlNode(sibling, false); sibling = sibling.NextSibling; } } } catch (Exception ex) { LogHelper.Log.Error(string.Format("获取内容页数据异常:{0}", ex)); } } private string ReturnHtmlContent(string oldStr) { int startIndex = -1,endIndex=-1; startIndex = oldStr.IndexOf("<script"); endIndex = oldStr.IndexOf("</script>"); if (startIndex != -1 && endIndex!=-1) { oldStr=oldStr.Remove(startIndex, endIndex+9 -startIndex); oldStr=ReturnHtmlContent(oldStr); } return oldStr; } private Dictionary<string,string> Get12345ProjectCodes() { Exception exception; try { string webPageContent = this.GetWebPageContent("http://113.107.142.14:12345/app/Deal/Pages/replylist.aspx"); if (!string.IsNullOrEmpty(webPageContent)) { try { webPageContent = ReturnHtmlContent(webPageContent); Lexer lexer = new Lexer(webPageContent); Parser parser = new Parser(lexer); NodeList htmlNodes = parser.Parse(null); for (int i = 0; i < htmlNodes.Count; i++) { RecursionHtmlNode(htmlNodes[i], false); } return dicList; } catch (Exception exception1) { exception = exception1; LogHelper.Log.Error(string.Format("获取内容页数据异常:{0}", exception)); } } } catch (Exception exception2) { exception = exception2; LogHelper.Log.Error(string.Format("抓取内容页异常:{0}", exception)); } return null; } Dictionary<string, string> Get12345ProjectCodes(int PageCount) { #region try { string strResult = ReturnHtml(PageCount); if (!string.IsNullOrEmpty(strResult)) { try { if (!string.IsNullOrEmpty(strResult)) { try { strResult = ReturnHtmlContent(strResult); Lexer lexer = new Lexer(strResult); Parser parser = new Parser(lexer); NodeList htmlNodes = parser.Parse(null); for (int i = 0; i < htmlNodes.Count; i++) { RecursionHtmlNode(htmlNodes[i], false); } LogHelper.Log.Error(string.Format("获取的数据量:{0}", dicList.Count)); return dicList; } catch (Exception exception1) { LogHelper.Log.Error(string.Format("获取内容页数据异常:{0}", exception1)); } } LogHelper.Log.Error(string.Format("获取的数据量:{0}", dicList.Count)); return dicList; } catch (Exception ex) { LogHelper.Log.Error(string.Format("抓取内容页异常:{0}", ex)); } } } catch (Exception ex) { LogHelper.Log.Error(string.Format("抓取内容页异常:{0},当前页:{1}", ex, PageCount)); } return dicList; #endregion } private string GetWebPageContent(string url) { string str2; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 500000; request.CookieContainer = this.cc; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (StreamReader reader = new StreamReader(response.GetResponseStream())) { str2 = reader.ReadToEnd(); } } catch (Exception exception) { LogHelper.Log.Error("请求内容页:" + exception); str2 = string.Empty; } return str2; } private void Login() { //this.GetWebPageContent("http://12345.jiangmen.gov.cn:9000/handler.ashx?uid=cgj_admin&pwd=qq"); this.GetWebPageContent("http://113.107.142.14:12345/app/Deal/Pages/replylist.aspx"); LogHelper.Log.Info("获取列表页数据成功"); } private void Login(string login_name, string pwss_word) { //this.GetWebPageContent("http://12345.jiangmen.gov.cn:9000/handler.ashx?uid=" + login_name + "&pwd=" + pwss_word); this.GetWebPageContent("http://113.107.142.14:12345/app/Deal/Pages/replylist.aspx"); LogHelper.Log.Info("获取列表页数据成功"); } /// <summary> /// 获取总页数 /// </summary> /// <param name="totalPageCount"></param> void GetTotalPageCount(ref int totalPageCount) { try { string strResult = ReturnHtml(); //获取总案件数量 int testCount = strResult.IndexOf("class='waitpage activied'"); int laseSpanCount = strResult.LastIndexOf("</span>"); //获取每页案件数量 //int testCount = 30; //获取总的页数 totalPageCount = 0; } catch (Exception ex) { LogHelper.Log.Error(string.Format("抓取内容页异常:{0}", ex)); } //int testCount = strResult.IndexOf("每页显示10项"); // int laseSpanCount = strResult.LastIndexOf("</span>"); // if (testCount >= 0) //只有一页 // { // string testStr = strResult.Substring(testCount, laseSpanCount - testCount); // int totalCount = testStr.IndexOf("总页数"); // totalPageCount = Convert.ToInt32(testStr.Substring(totalCount + 3)); // } // LogHelper.Log.Info("总页数:" + totalPageCount.ToString()); //} //catch (Exception ex) //{ // LogHelper.Log.Error(string.Format("抓取内容页异常:{0}", ex)); //} } private static string ReturnHtml() { Dictionary<string, string> postParams = new Dictionary<string, string>(); postParams.Add("__EVENTTARGET", "btnLogin"); postParams.Add("__EVENTARGUMENT", ""); postParams.Add("__VIEWSTATE", "/wEPDwUKMTQ3ODc3MDg4Nw9kFgICAw9kFgICAw8PZBYCHgV2YWx1ZQUgOUJGMzk5RjUyQTJDN0JFMDMyNURDQUREMDY3RTA4REVkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQdjYlRydXN0JK18zzTQsBhXx3/E/THqcjXiDMtvMA5/xVWK6vwjVZk="); postParams.Add("__EVENTVALIDATION", "/wEWBgL7/qTPBwL64ITXCAKT/Zj2BALg0rm0AwLR5qxWAoLch4YMUR4smh/9EOUQ9f0Eg0KWSFu1KOyQeWNxVFQei1Jfb6w="); postParams.Add("txbAccount", "admin_pjcg"); postParams.Add("txbPassword", "pjcg.12345"); postParams.Add("cbTrust", "on"); postParams.Add("rmk", "9BF399F52A2C7BE0325DCADD067E08DE"); //登陆地址 string getViewStateAndEventValidationLoginUrl = "http://113.107.142.14:12345/login.aspx"; //目标地址 string getDataUrl = "http://113.107.142.14:12345/app/Deal/Pages/replylist.aspx"; CookieContainer cookieContainer = new CookieContainer(); /////////////////////////////////////////////////// // 1.打开 MyLogin.aspx 页面,获得 GetVeiwState & EventValidation /////////////////////////////////////////////////// // 设置打开页面的参数 HttpWebRequest request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.AllowAutoRedirect = false; // 接收返回的页面 HttpWebResponse response = request.GetResponse() as HttpWebResponse; System.IO.Stream responseStream = response.GetResponseStream(); System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); string strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 2.自动填充并提交 Login.aspx 页面,提交Login.aspx页面,来保存Cookie /////////////////////////////////////////////////// // 要提交的字符串数据。格式形如:user=uesr1&password=123 string postString = ""; foreach (KeyValuePair<string, string> de in postParams) { //把提交按钮中的中文字符转换成url格式,以防中文或空格等信息 postString += System.Web.HttpUtility.UrlEncode(de.Key.ToString()) + "=" + System.Web.HttpUtility.UrlEncode(de.Value.ToString()) + "&"; } // 将提交的字符串数据转换成字节数组 byte[] postData = Encoding.ASCII.GetBytes(postString); // 设置提交的相关参数 request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "POST"; request.KeepAlive = false; request.ContentType = "application/x-www-form-urlencoded"; request.CookieContainer = cookieContainer; request.ContentLength = postData.Length; request.AllowAutoRedirect = false; // 提交请求数据 System.IO.Stream outputStream = request.GetRequestStream(); outputStream.Write(postData, 0, postData.Length); outputStream.Close(); // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 3.打开需要抓取数据的页面 /////////////////////////////////////////////////// // 设置打开页面的参数 request = WebRequest.Create(getDataUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.CookieContainer = cookieContainer; // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 4.分析返回的页面 /////////////////////////////////////////////////// return strResult; } private static string ReturnHtml(string id) { Dictionary<string, string> postParams = new Dictionary<string, string>(); postParams.Add("__EVENTTARGET", "btnLogin"); postParams.Add("__EVENTARGUMENT", ""); postParams.Add("__VIEWSTATE", "/wEPDwUKMTQ3ODc3MDg4Nw9kFgICAw9kFgICAw8PZBYCHgV2YWx1ZQUgOUJGMzk5RjUyQTJDN0JFMDMyNURDQUREMDY3RTA4REVkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQdjYlRydXN0JK18zzTQsBhXx3/E/THqcjXiDMtvMA5/xVWK6vwjVZk="); postParams.Add("__EVENTVALIDATION", "/wEWBgL7/qTPBwL64ITXCAKT/Zj2BALg0rm0AwLR5qxWAoLch4YMUR4smh/9EOUQ9f0Eg0KWSFu1KOyQeWNxVFQei1Jfb6w="); postParams.Add("txbAccount", "admin_pjcg"); postParams.Add("txbPassword", "pjcg.12345"); postParams.Add("cbTrust", "on"); postParams.Add("rmk", "9BF399F52A2C7BE0325DCADD067E08DE"); //登陆地址 string getViewStateAndEventValidationLoginUrl = "http://113.107.142.14:12345/login.aspx"; //目标地址 string getDataUrl = string.Format("http://113.107.142.14:12345/app/Deal/Pages/replyadv.aspx?id={0}", id); CookieContainer cookieContainer = new CookieContainer(); /////////////////////////////////////////////////// // 1.打开 MyLogin.aspx 页面,获得 GetVeiwState & EventValidation /////////////////////////////////////////////////// // 设置打开页面的参数 HttpWebRequest request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.AllowAutoRedirect = false; // 接收返回的页面 HttpWebResponse response = request.GetResponse() as HttpWebResponse; System.IO.Stream responseStream = response.GetResponseStream(); System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); string strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 2.自动填充并提交 Login.aspx 页面,提交Login.aspx页面,来保存Cookie /////////////////////////////////////////////////// // 要提交的字符串数据。格式形如:user=uesr1&password=123 string postString = ""; foreach (KeyValuePair<string, string> de in postParams) { //把提交按钮中的中文字符转换成url格式,以防中文或空格等信息 postString += System.Web.HttpUtility.UrlEncode(de.Key.ToString()) + "=" + System.Web.HttpUtility.UrlEncode(de.Value.ToString()) + "&"; } // 将提交的字符串数据转换成字节数组 byte[] postData = Encoding.ASCII.GetBytes(postString); // 设置提交的相关参数 request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "POST"; request.KeepAlive = false; request.ContentType = "application/x-www-form-urlencoded"; request.CookieContainer = cookieContainer; request.ContentLength = postData.Length; request.AllowAutoRedirect = false; // 提交请求数据 System.IO.Stream outputStream = request.GetRequestStream(); outputStream.Write(postData, 0, postData.Length); outputStream.Close(); // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 3.打开需要抓取数据的页面 /////////////////////////////////////////////////// // 设置打开页面的参数 request = WebRequest.Create(getDataUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.CookieContainer = cookieContainer; // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 4.分析返回的页面 /////////////////////////////////////////////////// return strResult; } private static string ReturnHtml(int page) { Dictionary<string, string> postParams = new Dictionary<string, string>(); postParams.Add("__EVENTTARGET", "btnLogin"); postParams.Add("__EVENTARGUMENT", ""); postParams.Add("__VIEWSTATE", "/wEPDwUKMTQ3ODc3MDg4Nw9kFgICAw9kFgICAw8PZBYCHgV2YWx1ZQUgOUJGMzk5RjUyQTJDN0JFMDMyNURDQUREMDY3RTA4REVkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQdjYlRydXN0JK18zzTQsBhXx3/E/THqcjXiDMtvMA5/xVWK6vwjVZk="); postParams.Add("__EVENTVALIDATION", "/wEWBgL7/qTPBwL64ITXCAKT/Zj2BALg0rm0AwLR5qxWAoLch4YMUR4smh/9EOUQ9f0Eg0KWSFu1KOyQeWNxVFQei1Jfb6w="); postParams.Add("txbAccount", "admin_pjcg"); postParams.Add("txbPassword", "pjcg.12345"); postParams.Add("cbTrust", "on"); postParams.Add("rmk", "9BF399F52A2C7BE0325DCADD067E08DE"); //登陆地址 string getViewStateAndEventValidationLoginUrl = "http://113.107.142.14:12345/login.aspx"; //目标地址 string getDataUrl = string.Format("http://113.107.142.14:12345/app/Deal/Pages/replylist.aspx?page={0}&year={1}", page, DateTime.Now.Year); CookieContainer cookieContainer = new CookieContainer(); /////////////////////////////////////////////////// // 1.打开 MyLogin.aspx 页面,获得 GetVeiwState & EventValidation /////////////////////////////////////////////////// // 设置打开页面的参数 //HttpWebRequest request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; HttpWebRequest request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.AllowAutoRedirect = false; // 接收返回的页面 HttpWebResponse response = request.GetResponse() as HttpWebResponse; System.IO.Stream responseStream = response.GetResponseStream(); System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); string strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 2.自动填充并提交 Login.aspx 页面,提交Login.aspx页面,来保存Cookie /////////////////////////////////////////////////// // 要提交的字符串数据。格式形如:user=uesr1&password=123 string postString = ""; foreach (KeyValuePair<string, string> de in postParams) { //把提交按钮中的中文字符转换成url格式,以防中文或空格等信息 postString += System.Web.HttpUtility.UrlEncode(de.Key.ToString()) + "=" + System.Web.HttpUtility.UrlEncode(de.Value.ToString()) + "&"; } // 将提交的字符串数据转换成字节数组 byte[] postData = Encoding.ASCII.GetBytes(postString); // 设置提交的相关参数 request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest; request.Method = "POST"; request.KeepAlive = false; request.ContentType = "application/x-www-form-urlencoded"; request.CookieContainer = cookieContainer; request.ContentLength = postData.Length; request.AllowAutoRedirect = false; // 提交请求数据 System.IO.Stream outputStream = request.GetRequestStream(); outputStream.Write(postData, 0, postData.Length); outputStream.Close(); // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 3.打开需要抓取数据的页面 /////////////////////////////////////////////////// // 设置打开页面的参数 request = WebRequest.Create(getDataUrl) as HttpWebRequest; request.Method = "GET"; request.KeepAlive = false; request.CookieContainer = cookieContainer; // 接收返回的页面 response = request.GetResponse() as HttpWebResponse; responseStream = response.GetResponseStream(); reader = new System.IO.StreamReader(responseStream, Encoding.UTF8); strResult = reader.ReadToEnd(); /////////////////////////////////////////////////// // 4.分析返回的页面 /////////////////////////////////////////////////// return strResult; } } }
上一篇: 最让老师烦恼的事
下一篇: 怎么学游泳 学游泳的6个基本步骤