Asp.Net、asp实现的搜索引擎网址收录检查程序
程序员文章站
2024-02-22 08:28:34
使用asp.net或者asp检查某个url地址,某篇文章是否被搜索引擎,如百度,谷歌,搜狗收录。
实现原理:直接搜索你那篇文章的url地址(不带协议,但上协议也行,代码会...
使用asp.net或者asp检查某个url地址,某篇文章是否被搜索引擎,如百度,谷歌,搜狗收录。
实现原理:直接搜索你那篇文章的url地址(不带协议,但上协议也行,代码会自动去掉协议内容),如果被索引会返回搜索结果,否则会提示找不到信息。
asp.net检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码:
using system; using system.net; using system.text; using system.io; using system.web; public class searchengineindex { public static string[] urls = { //搜索引擎检查地址 "http://www.baidu.com/s?ie=utf-8&wd=",//百度索引url检查地址 "https://www.google.com.hk/search?q=",//谷歌索引url检查地址 "http://www.sogou.com/web?ie=utf8&query="//搜狗索引url检查地址 } , nofindkeyword = { "抱歉,没有找到与", "找不到和您的查询", "未收录?" };//搜索引擎未索引url地址时的关键字 /// <summary> /// 获取响应的编码 /// </summary> /// <param name="contenttype"></param> /// <returns></returns> private static encoding getencoding(string contenttype) { if (!string.isnullorempty(contenttype)) { contenttype = contenttype.tolower(); if (contenttype.indexof("gb2312") != -1 || contenttype.indexof("gbk") != -1) return encoding.getencoding(936); if (contenttype.indexof("big5") != -1) return encoding.getencoding(950); } return encoding.utf8; } /// <summary> /// 使用httpwebrequest对象,自动识别字符集 /// </summary> /// <param name="url"></param> /// <param name="adduseragent">是否添加useragent,采集其他网站时防止被拦截</param> /// <returns></returns> public static string gethtml(string url, bool adduseragent) { httpwebrequest request = (httpwebrequest)httpwebrequest.create(url); if (adduseragent) request.useragent = "googlebot|feedfetcher-google|baiduspider"; string html = null; try { httpwebresponse response = (httpwebresponse)request.getresponse(); streamreader srd = new streamreader(response.getresponsestream(), getencoding(response.contenttype)); html = srd.readtoend(); srd.close(); response.close(); } catch { } return html; } /// <summary> /// 检查某个url是否被搜索引擎索引 /// </summary> /// <param name="url">url地址</param> /// <param name="engin">0:百度 1:谷歌 2:搜狗,其他搜索引擎如bing和360直接查网址显示的结果不是直接得到网址的,有些出入,不做检查</param> /// <returns></returns> public static bool checkindex(string url, int engin) { if (string.isnullorempty(url)) return false; if (engin < 0 || engin > 2) engin = 0; url = urls[engin] + httputility.urlencode(url.tolower().replace("http://", "").replace("https://", "")); bool r = true; string html = gethtml(url, true); if (html == null || html.indexof(nofindkeyword[engin]) != -1) r = false; return r; } } //调用方法示例 searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 0);//检查百度索引 searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 1);//检查谷歌索引 searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 2);//检查搜狗索引
asp检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码:
<% class searchenginindex dim urls,nofindkeyword private sub class_initialize '百度,谷歌,搜狗url地址索引查询地址 urls=array("http://www.baidu.com/s?ie=utf-8&wd=","https://www.google.com.hk/search?q=","http://www.sogou.com/web?ie=utf8&query=") '搜索引擎未索引url地址时的关键字 nofindkeyword=array("抱歉,没有找到与", "找不到和您的查询", "未收录?") end sub private function getencoding(contenttype) contenttype=lcase(contenttype) if instr(contenttype,"gb2312")<>0 and instr(contenttype,"gbk")<>0 then getencoding="gb2312" elseif instr(contenttype,"big5")<>0 then getencoding="big5" else getencoding="utf-8" end if end function private function bintostring(bin,encoding)'将2进制流数据依据编码转为对应的字符串内容 dim obj set obj=server.createobject("adodb.stream") obj.type=1:obj.mode=3:obj.open obj.write bin obj.position=0:obj.type=2:obj.charset=encoding bintostring=obj.readtext obj.close:set obj=nothing end function public function gethtml(url) dim xhr set xhr=server.createobject("microsoft.xmlhttp") xhr.open "get",url,false xhr.send encoding=getencoding(xhr.getresponseheader("content-type")) response.charset=encoding gethtml=bintostring(xhr.responsebody,encoding) set xhr=nothing end function public function checkindex(url,engin) if len(url)=0 then exit function if engin<0 or engin>2 then engin=1 url=urls(engin)&server.urlencode(url) dim html html=gethtml(url) checkindex=instr(html,nofindkeyword(engin))=0 end function end class set sei=new searchenginindex response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",0)'百度索引 response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",1)'谷歌索引 response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",2)'搜狗索引 set sei=nothing %>