C# 访问网页并对页面上的元素进行抓取
C# 访问网页并对页面上的元素进行抓取
//
HttpWebRequest request=HttpWebRequest.Create(“URL”) AS HttpWebRequest;
//设置访问页面的标头
request.Method = “get”;//通过get方式访问
//在需访问的页面F12,Network下的Headers中可以查看
request.Accept = “”;
request.ContentType = “”;
request.UserAgent = “”;
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
StreamReader steamReader=new SteamReader(response.GetResponseStream());
//如果请求下来的页面是zip格式
Stream ResStream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress);
Encoding encoding = Encoding.GetEncoding(“utf-8”);
StreamReader streamReader = new StreamReader(ResStream, encoding);
//请求下来的HTML页面
String data=stream.ReadToEnd();
//以携程网主页房间数为例
Hashtable hashtable = new Hashtable();// 网页中元素对象
var htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(data);//解析
HtmlNode token = htmlDoc.DocumentNode.SelectSingleNode(Xpath);
List<String> li = new List<string>();
//遍历其中符合条件的数据
foreach (HtmlNode row in token.SelectNodes("option"))
{
li.Add(row.InnerHtml);
}
String aa=string.Empty;
for (int i=0;i<li.Count;i++) {
aa += li[i]+ "\r\n";
}
//显示爬取下来的数据aa
本文地址:https://blog.csdn.net/m0_50559891/article/details/113685177