Asp.Net、asp实现的搜索引擎网址收录检查程序

程序员文章站 2024-02-22 08:28:34

使用asp.net或者asp检查某个url地址，某篇文章是否被搜索引擎，如百度，谷歌，搜狗收录。实现原理：直接搜索你那篇文章的url地址（不带协议，但上协议也行，代码会...

使用asp.net或者asp检查某个url地址，某篇文章是否被搜索引擎，如百度，谷歌，搜狗收录。

实现原理：直接搜索你那篇文章的url地址（不带协议，但上协议也行，代码会自动去掉协议内容），如果被索引会返回搜索结果，否则会提示找不到信息。

asp.net检查百度，谷歌，搜狗搜索引擎是否收录文章网址源代码：

using system;
using system.net;
using system.text;
using system.io;
using system.web;
public class searchengineindex
{
  public static string[] urls = { //搜索引擎检查地址
      "http://www.baidu.com/s?ie=utf-8&wd=",//百度索引url检查地址
      "https://www.google.com.hk/search?q=",//谷歌索引url检查地址
      "http://www.sogou.com/web?ie=utf8&query="//搜狗索引url检查地址
    }
    , nofindkeyword = { "抱歉，没有找到与", "找不到和您的查询", "未收录？" };//搜索引擎未索引url地址时的关键字
  /// <summary>
  /// 获取响应的编码
  /// </summary>
  /// <param name="contenttype"></param>
  /// <returns></returns>
  private static encoding getencoding(string contenttype)
  {
    if (!string.isnullorempty(contenttype))
    {
      contenttype = contenttype.tolower();
      if (contenttype.indexof("gb2312") != -1 || contenttype.indexof("gbk") != -1) return encoding.getencoding(936);
      if (contenttype.indexof("big5") != -1) return encoding.getencoding(950);
    }
    return encoding.utf8;
  }
  /// <summary>
  /// 使用httpwebrequest对象，自动识别字符集
  /// </summary>
  /// <param name="url"></param>
  /// <param name="adduseragent">是否添加useragent，采集其他网站时防止被拦截</param>
  /// <returns></returns>
  public static string gethtml(string url, bool adduseragent)
  {
    httpwebrequest request = (httpwebrequest)httpwebrequest.create(url);
    if (adduseragent) request.useragent = "googlebot|feedfetcher-google|baiduspider";
    string html = null;
    try
    {
      httpwebresponse response = (httpwebresponse)request.getresponse();
      streamreader srd = new streamreader(response.getresponsestream(), getencoding(response.contenttype));
      html = srd.readtoend();
      srd.close();
      response.close();
    }
    catch { }
    return html;
  }
  /// <summary>
  /// 检查某个url是否被搜索引擎索引
  /// </summary>
  /// <param name="url">url地址</param>
  /// <param name="engin">0：百度 1：谷歌 2：搜狗，其他搜索引擎如bing和360直接查网址显示的结果不是直接得到网址的，有些出入，不做检查</param>
  /// <returns></returns>
  public static bool checkindex(string url, int engin)
  {
    if (string.isnullorempty(url)) return false;
    if (engin < 0 || engin > 2) engin = 0;
    url = urls[engin] + httputility.urlencode(url.tolower().replace("http://", "").replace("https://", ""));
    bool r = true;
    string html = gethtml(url, true);
    if (html == null || html.indexof(nofindkeyword[engin]) != -1) r = false;
    return r;
  }
}



//调用方法示例

    searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 0);//检查百度索引
    searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 1);//检查谷歌索引
    searchengineindex.checkindex("www.jb51.net/article/20101014/2902.aspx", 2);//检查搜狗索引

asp检查百度，谷歌，搜狗搜索引擎是否收录文章网址源代码：

<%
class searchenginindex
 dim urls,nofindkeyword
 private sub class_initialize
  '百度，谷歌，搜狗url地址索引查询地址
  urls=array("http://www.baidu.com/s?ie=utf-8&wd=","https://www.google.com.hk/search?q=","http://www.sogou.com/web?ie=utf8&query=")
  '搜索引擎未索引url地址时的关键字
  nofindkeyword=array("抱歉，没有找到与", "找不到和您的查询", "未收录？")
 end sub
 private function getencoding(contenttype)
  contenttype=lcase(contenttype)
  if instr(contenttype,"gb2312")<>0 and instr(contenttype,"gbk")<>0 then
   getencoding="gb2312"
  elseif instr(contenttype,"big5")<>0 then
   getencoding="big5"
  else
   getencoding="utf-8"
  end if
 end function
 private function bintostring(bin,encoding)'将2进制流数据依据编码转为对应的字符串内容
  dim obj
  set obj=server.createobject("adodb.stream")
  obj.type=1:obj.mode=3:obj.open
  obj.write bin
  obj.position=0:obj.type=2:obj.charset=encoding
  bintostring=obj.readtext
  obj.close:set obj=nothing
 end function
 public function gethtml(url)
  dim xhr
  set xhr=server.createobject("microsoft.xmlhttp")
  xhr.open "get",url,false
  xhr.send
  encoding=getencoding(xhr.getresponseheader("content-type"))
  response.charset=encoding
  gethtml=bintostring(xhr.responsebody,encoding)
  set xhr=nothing
 end function
 public function checkindex(url,engin)
  if len(url)=0 then exit function
  if engin<0 or engin>2 then engin=1
  url=urls(engin)&server.urlencode(url)
  dim html
  html=gethtml(url)
  checkindex=instr(html,nofindkeyword(engin))=0
 end function
end class
set sei=new searchenginindex
response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",0)'百度索引
response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",1)'谷歌索引
response.write sei.checkindex("www.jb51.net/article/20101014/2902.aspx",2)'搜狗索引
set sei=nothing
 %>

上一篇： ClassLoader Test 博客分类： java语言相关 JavaIDEACC++C#

下一篇： java基于TCP协议实现聊天程序