php实现专业获取网站SEO信息类实例
程序员文章站
2024-02-05 15:16:10
本文实例讲述了php实现专业获取网站seo信息类。分享给大家供大家参考。具体如下:
这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他...
本文实例讲述了php实现专业获取网站seo信息类。分享给大家供大家参考。具体如下:
这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他meta标签数据的
- 获取网站的导入链接,从alexa的流量排名
- 获取网站的导入链接,由谷歌索引的网页数量
- 获取网站的信任,从wot排名。
- 获取,因为它是第一个注册的网站域名年龄
- 获取的twitter网站页面的数量
- 获取的facebook链接的网站页面
- 获取网站谷歌网页速度等级
- 获取网站的谷歌网页排名
<?php /** * * seo report for different metrics * * @category seo * @author chema <chema@garridodiaz.com> * @copyright (c) 2009-2012 open classifieds team * @license gpl v3 * based on seo report script http://www.phpeasycode.com && php class seostats * */ class seoreport{ /** * * check if a url is online/alive * @param string $url * @return bool */ public static function is_alive($url) { $ch = curl_init(); curl_setopt($ch, curlopt_url, $url); curl_setopt($ch, curlopt_binarytransfer, 1); curl_setopt($ch, curlopt_headerfunction, 'curlheadercallback'); curl_setopt($ch, curlopt_failonerror, 1); curl_exec ($ch); $int_return_code = curl_getinfo($ch, curlinfo_http_code); curl_close ($ch); if ($int_return_code != 200 && $int_return_code != 302 && $int_return_code != 304) { return false; } else return true; } /** * http get request with curl. * * @param string $url string, containing the url to curl. * @return string returns string, containing the curl result. * */ protected static function get_html($url) { $ch = curl_init($url); curl_setopt($ch,curlopt_returntransfer,1); curl_setopt($ch,curlopt_connecttimeout,5); curl_setopt($ch,curlopt_followlocation,1); curl_setopt($ch,curlopt_maxredirs,2); if(strtolower(parse_url($url, php_url_scheme)) == 'https') { curl_setopt($ch,curlopt_ssl_verifypeer,1); curl_setopt($ch,curlopt_ssl_verifyhost,1); } $str = curl_exec($ch); curl_close($ch); return ($str)?$str:false; } /** * * get the domain from any url * @param string $url */ public static function domain_name($url) { $nowww = ereg_replace('www\.','',$url); $domain = parse_url($nowww); if(!empty($domain["host"])) return $domain["host"]; else return $domain["path"]; } /** * * get the metas from a url and the language of the site * @param string $url * @return array */ public static function meta_info($url) { //doesn't work at mediatemple /*$html = new domdocument(); if(!$html->loadhtmlfile($url)) return false;*/ if (!$html_content = self::get_html($url)) return false; $html = new domdocument(); $html->loadhtml($html_content); $xpath = new domxpath( $html ); $url_info = array(); $langs = $xpath->query( '//html' ); foreach ($langs as $lang) { $url_info['language'] = $lang->getattribute('lang'); } $metas = $xpath->query( '//meta' ); foreach ($metas as $meta) { if ($meta->getattribute('name')) { $url_info[$meta->getattribute('name')] = $meta->getattribute('content'); } } return $url_info; } /** * * alexa rank * @param string $url * @return integer */ public static function alexa_rank($url) { $domain = self::domain_name($url); $request = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain; $data = self::get_html($request); preg_match('/<popularity url="(.*?)" text="([\d]+)"\/>/si', $data, $p); return ($l[2]) ? $l[2] : null; } /** * * alexa inbounds link * @param string $url * @return integer */ public static function alexa_links($url) { $domain = self::domain_name($url); $request = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain; $data = self::get_html($request); preg_match('/<linksin num="([\d]+)"\/>/si', $data, $l); return ($l[1]) ? $l[1] : null; } /** * returns total amount of results for any google search, * requesting the deprecated websearch api. * * @param string $query string, containing the search query. * @return integer returns a total count. */ public static function google_pages($url) { //$query = self::domain_name($url); $url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q='.$url; $str = self::get_html($url); $data = json_decode($str); return (!isset($data->responsedata->cursor->estimatedresultcount)) ? '0' : intval($data->responsedata->cursor->estimatedresultcount); } /** * * gets the inbounds links from a site * @param string $url * @param integer */ public static function google_links($url) { $request = "http://www.google.com/search?q=" . urlencode("link:" . $url) . "&hl=en"; $data = self::get_html($request); preg_match('/<div id=resultstats>(about )?([\d,]+) result/si', $data, $l); return ($l[2]) ? $l[2] : null; } /** * * web of trust rating * @param string $url * @reutn integer */ public static function wot_rating($url) { $domain = self::domain_name($url); $request = "http://api.mywot.com/0.4/public_query2?target=" . $domain; $data = self::get_html($request); preg_match_all('/<application name="(\d+)" r="(\d+)" c="(\d+)"\/>/si', $data, $regs); $trustworthiness = ($regs[2][0]) ? $regs[2][0] : null; return (is_numeric($trustworthiness))? $trustworthiness:null; } /** * * how old is the domain? * @param string $domain * @return integer unixtime */ public static function domain_age($domain) { $request = "http://reports.internic.net/cgi/whois?whois_nic=" . $domain . "&type=domain"; $data = self::get_html($request); preg_match('/creation date: ([a-z0-9-]+)/si', $data, $p); return (!$p[1])?false:strtotime($p[1]); } /** * * counts how many tweets about the url * @param string $url * @return integer */ public static function tweet_count($url) { $url = urlencode($url); $twitterendpoint = "http://urls.api.twitter.com/1/urls/count.json?url=%s"; $filedata = file_get_contents(sprintf($twitterendpoint, $url)); $json = json_decode($filedata, true); unset($filedata); // free memory return (is_numeric($json['count']))? $json['count']:null; } /** * returns the total amount of facebook shares for a single page * * @link https://graph.facebook.com/ * @param string the url to check. * @return integer returns the total amount of facebook */ public static function facebook_shares($q) { //execution and result of json $str = self::get_html('http://graph.facebook.com/?id='.urlencode($q)); $data = json_decode($str); //return only number of facebook shares $r = $data->shares; return ($r != null) ? $r : intval('0'); } /** * * get the pagespeed rank over 100 * @param string $url * @return integer */ public static function page_speed($url) { $url = 'https://developers.google.com/_apps/pagespeed/run_pagespeed?url='.$url.'&format=json'; $str = self::get_html($url); $data = json_decode($str); return intval($data->results->score); } /** * * get google page rank * @param string $url * @return integer */ public static function page_rank($url) { $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=".self::checkhash(self::hashurl($url)). "&features=rank&q=info:".$url."&num=100&filter=0"; $data = self::get_html($query);//die(print_r($data)); $pos = strpos($data, "rank_"); if($pos === false) { return null; } else { $pagerank = substr($data, $pos + 9); return $pagerank; } } // functions for google pagerank /** * to calculate pr functions */ public static function strtonum($str, $check, $magic) { $int32unit = 4294967296; // 2^32 $length = strlen($str); for ($i = 0; $i < $length; $i++) { $check *= $magic; //if the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31), // the result of converting to integer is undefined // refer to http://www.php.net/manual/en/language.types.integer.php if ($check >= $int32unit) { $check = ($check - $int32unit * (int) ($check / $int32unit)); //if the check less than -2^31 $check = ($check < -2147483648) ? ($check + $int32unit) : $check; } $check += ord($str{$i}); } return $check; } /** * genearate a hash for a url */ public static function hashurl($string) { $check1 = self::strtonum($string, 0x1505, 0x21); $check2 = self::strtonum($string, 0, 0x1003f); $check1 >>= 2; $check1 = (($check1 >> 4) & 0x3ffffc0 ) | ($check1 & 0x3f); $check1 = (($check1 >> 4) & 0x3ffc00 ) | ($check1 & 0x3ff); $check1 = (($check1 >> 4) & 0x3c000 ) | ($check1 & 0x3fff); $t1 = (((($check1 & 0x3c0) << 4) | ($check1 & 0x3c)) <<2 ) | ($check2 & 0xf0f ); $t2 = (((($check1 & 0xffffc000) << 4) | ($check1 & 0x3c00)) << 0xa) | ($check2 & 0xf0f0000 ); return ($t1 | $t2); } /** * genearate a checksum for the hash string */ public static function checkhash($hashnum) { $checkbyte = 0; $flag = 0; $hashstr = sprintf('%u', $hashnum) ; $length = strlen($hashstr); for ($i = $length - 1; $i >= 0; $i --) { $re = $hashstr{$i}; if (1 === ($flag % 2)) { $re += $re; $re = (int)($re / 10) + ($re % 10); } $checkbyte += $re; $flag ++; } $checkbyte %= 10; if (0 !== $checkbyte) { $checkbyte = 10 - $checkbyte; if (1 === ($flag % 2) ) { if (1 === ($checkbyte % 2)) { $checkbyte += 9; } $checkbyte >>= 1; } } return '7'.$checkbyte.$hashstr; } }
使用范例
<?php include 'seoreport.php'; ini_set('max_execution_time', 180); $url = (isset($_get['url']))?$_get['url']:'http://phpclasses.org'; $meta_tags = seoreport::meta_info($url); //die(var_dump($meta_tags)); //first check if site online if ($meta_tags!==false) { $stats = array(); $stats['meta'] = $meta_tags; $stats['alexa']['rank'] = seoreport::alexa_rank($url); $stats['alexa']['links'] = seoreport::alexa_links($url); $stats['domain']['wot_rating'] = seoreport::wot_rating($url); $stats['domain']['domain_age'] = seoreport::domain_age($url); $stats['social']['twitter'] = seoreport::tweet_count($url); $stats['social']['facebook'] = seoreport::facebook_shares($url); $stats['google']['page_rank'] = seoreport::page_rank($url); $stats['google']['page_speed'] = seoreport::page_speed($url); $stats['google']['pages'] = seoreport::google_pages($url); $stats['google']['links'] = seoreport::google_links($url); var_dump($stats); } else 'site not online. '.$url;
希望本文所述对大家的php程序设计有所帮助。