欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

php实现专业获取网站SEO信息类实例

程序员文章站 2024-02-05 15:16:10
本文实例讲述了php实现专业获取网站seo信息类。分享给大家供大家参考。具体如下: 这个seo类的功能包括: - 检查指定的网站响应 - 获取从该网站主页的语言和其他...

本文实例讲述了php实现专业获取网站seo信息类。分享给大家供大家参考。具体如下:

这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他meta标签数据的
- 获取网站的导入链接,从alexa的流量排名
- 获取网站的导入链接,由谷歌索引的网页数量
- 获取网站的信任,从wot排名。
- 获取,因为它是第一个注册的网站域名年龄
- 获取的twitter网站页面的数量
- 获取的facebook链接的网站页面
- 获取网站谷歌网页速度等级
- 获取网站的谷歌网页排名

<?php
/**
 *
 * seo report for different metrics
 *
 * @category seo
 * @author chema <chema@garridodiaz.com>
 * @copyright (c) 2009-2012 open classifieds team
 * @license gpl v3
 * based on seo report script http://www.phpeasycode.com && php class seostats
 *
 */
class seoreport{
  /**
   *
   * check if a url is online/alive
   * @param string $url
   * @return bool
   */
  public static function is_alive($url)
  {
    $ch = curl_init();
    curl_setopt($ch, curlopt_url, $url);
    curl_setopt($ch, curlopt_binarytransfer, 1);
    curl_setopt($ch, curlopt_headerfunction, 'curlheadercallback');
    curl_setopt($ch, curlopt_failonerror, 1);
    curl_exec ($ch);
    $int_return_code = curl_getinfo($ch, curlinfo_http_code);
    curl_close ($ch);
    if ($int_return_code != 200 && $int_return_code != 302 && $int_return_code != 304)
    {
      return false;
    }
    else return true;
  }
  /**
   * http get request with curl.
   *
   * @param string $url string, containing the url to curl.
   * @return string returns string, containing the curl result.
   *
   */
  protected static function get_html($url)
  {
    $ch = curl_init($url);
    curl_setopt($ch,curlopt_returntransfer,1);
    curl_setopt($ch,curlopt_connecttimeout,5);
    curl_setopt($ch,curlopt_followlocation,1);
    curl_setopt($ch,curlopt_maxredirs,2);
    if(strtolower(parse_url($url, php_url_scheme)) == 'https')
    {
      curl_setopt($ch,curlopt_ssl_verifypeer,1);
      curl_setopt($ch,curlopt_ssl_verifyhost,1);
    }
    $str = curl_exec($ch);
    curl_close($ch);
    return ($str)?$str:false;
  }
  /**
   *
   * get the domain from any url
   * @param string $url
   */
  public static function domain_name($url)
  {
    $nowww = ereg_replace('www\.','',$url);
    $domain = parse_url($nowww);
    if(!empty($domain["host"]))
      return $domain["host"];
    else
      return $domain["path"];
  }
  /**
   *
   * get the metas from a url and the language of the site
   * @param string $url
   * @return array
   */
  public static function meta_info($url)
  {
    //doesn't work at mediatemple
    /*$html = new domdocument();
    if(!$html->loadhtmlfile($url))
      return false;*/
    if (!$html_content = self::get_html($url))
        return false;
    $html = new domdocument();
    $html->loadhtml($html_content);
       
    $xpath = new domxpath( $html );
    $url_info = array();
    $langs = $xpath->query( '//html' );
    foreach ($langs as $lang)
    {
      $url_info['language'] = $lang->getattribute('lang');
    }
    $metas = $xpath->query( '//meta' );
    foreach ($metas as $meta)
    {
      if ($meta->getattribute('name'))
      {
        $url_info[$meta->getattribute('name')] = $meta->getattribute('content');
      }
    }
    return $url_info;
  }
  /**
   *
   * alexa rank
   * @param string $url
   * @return integer
   */
  public static function alexa_rank($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<popularity url="(.*?)" text="([\d]+)"\/>/si', $data, $p);
    return ($l[2]) ? $l[2] : null;
  }
  /**
   *
   * alexa inbounds link
   * @param string $url
   * @return integer
   */
  public static function alexa_links($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<linksin num="([\d]+)"\/>/si', $data, $l);
    return ($l[1]) ? $l[1] : null;
  }
  /**
   * returns total amount of results for any google search,
   * requesting the deprecated websearch api.
   *
   * @param    string    $query   string, containing the search query.
   * @return    integer          returns a total count.
   */
  public static function google_pages($url)
  {
    //$query = self::domain_name($url);
    $url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q='.$url;
    $str = self::get_html($url);
    $data = json_decode($str);
    return (!isset($data->responsedata->cursor->estimatedresultcount))
        ? '0'
        : intval($data->responsedata->cursor->estimatedresultcount);
  }
  /**
   *
   * gets the inbounds links from a site
   * @param string $url
   * @param integer
   */
  public static function google_links($url)
  {
    $request   = "http://www.google.com/search?q=" . urlencode("link:" . $url) . "&hl=en";
    $data     = self::get_html($request);
    preg_match('/<div id=resultstats>(about )?([\d,]+) result/si', $data, $l);
    return ($l[2]) ? $l[2] : null;
  }
  /**
   *
   * web of trust rating
   * @param string $url
   * @reutn integer
   */
  public static function wot_rating($url)
  {
    $domain = self::domain_name($url);
    $request = "http://api.mywot.com/0.4/public_query2?target=" . $domain;
    $data   = self::get_html($request);
    preg_match_all('/<application name="(\d+)" r="(\d+)" c="(\d+)"\/>/si', $data, $regs);
    $trustworthiness = ($regs[2][0]) ? $regs[2][0] : null;
    return (is_numeric($trustworthiness))? $trustworthiness:null;
  }
   
  /**
   *
   * how old is the domain?
   * @param string $domain
   * @return integer unixtime
   */
  public static function domain_age($domain)
  {
    $request = "http://reports.internic.net/cgi/whois?whois_nic=" . $domain . "&type=domain";
    $data   = self::get_html($request);
    preg_match('/creation date: ([a-z0-9-]+)/si', $data, $p);
    return (!$p[1])?false:strtotime($p[1]);
  }
  /**
   *
   * counts how many tweets about the url
   * @param string $url
   * @return integer
   */
  public static function tweet_count($url)
  {
    $url = urlencode($url);
    $twitterendpoint = "http://urls.api.twitter.com/1/urls/count.json?url=%s";
    $filedata = file_get_contents(sprintf($twitterendpoint, $url));
    $json = json_decode($filedata, true);
    unset($filedata);        // free memory
    return (is_numeric($json['count']))? $json['count']:null;
  }
  /**
   * returns the total amount of facebook shares for a single page
   *
   * @link     https://graph.facebook.com/
   * @param     string   the url to check.
   * @return    integer  returns the total amount of facebook
   */
  public static function facebook_shares($q)
  {
    //execution and result of json
    $str = self::get_html('http://graph.facebook.com/?id='.urlencode($q));
    $data = json_decode($str);
    //return only number of facebook shares
    $r = $data->shares;
    return ($r != null) ? $r : intval('0');
  }
  /**
   *
   * get the pagespeed rank over 100
   * @param string $url
   * @return integer
   */
  public static function page_speed($url)
  {
    $url = 'https://developers.google.com/_apps/pagespeed/run_pagespeed?url='.$url.'&format=json';
    $str = self::get_html($url);
    $data = json_decode($str);
    return intval($data->results->score);
  }
  /**
   *
   * get google page rank
   * @param string $url
   * @return integer
   */
  public static function page_rank($url)
  {
     $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=".self::checkhash(self::hashurl($url)). "&features=rank&q=info:".$url."&num=100&filter=0";
      $data = self::get_html($query);//die(print_r($data));
    $pos  = strpos($data, "rank_");
    if($pos === false)
    {
      return null;
    }
    else
    {
      $pagerank = substr($data, $pos + 9);
      return $pagerank;
    }
  }
  // functions for google pagerank
  /**
   * to calculate pr functions
   */
  public static function strtonum($str, $check, $magic)
  {
    $int32unit = 4294967296; // 2^32
    $length = strlen($str);
    for ($i = 0; $i < $length; $i++) {
      $check *= $magic;
      //if the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
      // the result of converting to integer is undefined
      // refer to http://www.php.net/manual/en/language.types.integer.php
      if ($check >= $int32unit) {
        $check = ($check - $int32unit * (int) ($check / $int32unit));
        //if the check less than -2^31
        $check = ($check < -2147483648) ? ($check + $int32unit) : $check;
      }
      $check += ord($str{$i});
    }
    return $check;
  }
  /**
   * genearate a hash for a url
   */
  public static function hashurl($string)
  {
    $check1 = self::strtonum($string, 0x1505, 0x21);
    $check2 = self::strtonum($string, 0, 0x1003f);
    $check1 >>= 2;
    $check1 = (($check1 >> 4) & 0x3ffffc0 ) | ($check1 & 0x3f);
    $check1 = (($check1 >> 4) & 0x3ffc00 ) | ($check1 & 0x3ff);
    $check1 = (($check1 >> 4) & 0x3c000 ) | ($check1 & 0x3fff);
    $t1 = (((($check1 & 0x3c0) << 4) | ($check1 & 0x3c)) <<2 ) | ($check2 & 0xf0f );
    $t2 = (((($check1 & 0xffffc000) << 4) | ($check1 & 0x3c00)) << 0xa) | ($check2 & 0xf0f0000 );
    return ($t1 | $t2);
  }
  /**
   * genearate a checksum for the hash string
   */
  public static function checkhash($hashnum)
  {
    $checkbyte = 0;
    $flag = 0;
    $hashstr = sprintf('%u', $hashnum) ;
    $length = strlen($hashstr);
    for ($i = $length - 1; $i >= 0; $i --) {
      $re = $hashstr{$i};
      if (1 === ($flag % 2)) {
        $re += $re;
        $re = (int)($re / 10) + ($re % 10);
      }
      $checkbyte += $re;
      $flag ++;
    }
    $checkbyte %= 10;
    if (0 !== $checkbyte) {
      $checkbyte = 10 - $checkbyte;
      if (1 === ($flag % 2) ) {
        if (1 === ($checkbyte % 2)) {
          $checkbyte += 9;
        }
        $checkbyte >>= 1;
      }
    }
    return '7'.$checkbyte.$hashstr;
  }
}

使用范例

<?php
include 'seoreport.php';
ini_set('max_execution_time', 180);
  $url = (isset($_get['url']))?$_get['url']:'http://phpclasses.org';
  $meta_tags = seoreport::meta_info($url);
  //die(var_dump($meta_tags));
  //first check if site online
  if ($meta_tags!==false)
  {
    $stats = array();
    $stats['meta'] = $meta_tags;
    $stats['alexa']['rank'] = seoreport::alexa_rank($url);
    $stats['alexa']['links'] = seoreport::alexa_links($url);
    $stats['domain']['wot_rating'] = seoreport::wot_rating($url);  
    $stats['domain']['domain_age'] = seoreport::domain_age($url);  
    $stats['social']['twitter'] = seoreport::tweet_count($url);  
    $stats['social']['facebook'] = seoreport::facebook_shares($url);
    $stats['google']['page_rank'] = seoreport::page_rank($url);
    $stats['google']['page_speed'] = seoreport::page_speed($url);
    $stats['google']['pages'] = seoreport::google_pages($url);
    $stats['google']['links'] = seoreport::google_links($url);
    var_dump($stats);
  }
  else 'site not online. '.$url;

希望本文所述对大家的php程序设计有所帮助。