网页快照功能_PHP教程
// FileName: snap.class.php
// Summary: 网页快照类
// Author: millken(迷路林肯)
// LastModifed:2007-06-29
// copyright (c)2007 [email]millken@gmail.com[/email]
//====================================================
class snap{
var $dir;
var $log;
var $contents;
var $filename;
var $host;
var $name;
var $data_ts;
var $ttl;
var $url;
var $ts;
function snap(){
$this->log = "New snap() object instantiated.
n";
$this->dir = dirname(__FILE__)."/";
}
function fetch($url="",$ttl=10){
$this->log .= "--------------------------------
fetch() called
n";
$this->log .= "url: ".$url."
n";
$hosts = parse_url($url);
$this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
if (!$url) {
$this->log .= "OOPS: You need to pass a URL!
";
return false;
}
$this->ttl = $ttl;
$this->url = $url;
$this->name = md5($this->url);
$this->filename = $this->dir.$this->name;
$this->log .= "Filename: ".$this->filename."
";
$this->getFile_ts();
$this->file_get_content();
}
function file_get_content(){
ob_start();
$this->ts = time() - $this->data_ts;
if($this->data_ts 0 && $this->ts ttl){
$this->log .= "cache has expired
";
@readfile($this->filename);
$this->contents = ob_get_contents();
ob_end_clean();
}else{
$this->log .= "cache hasn't expired
";
@readfile($this->url);
$this->contents = ob_get_contents();
ob_end_clean();
$this->saveToCache();
}
return true;
}
function saveToCache(){
$this->log .= "saveToCache() called
";
//create file pointer
if (!$fp=@fopen($this->filename,"w")) {
$this->log .= "Could not open ".$this->filename."
";
return false;
}
$this->contents = $this->formaturl($this->contents,$this->host);
$this->contents = preg_replace("''si","",$this->contents);
//write to file
if (!@fwrite($fp,$this->contents)) {
$this->log .= "Could not write to ".$this->filename."
";
fclose($fp);
return false;
}
//close file pointer
fclose($fp);
return true;
}
function getFile_ts(){
$this->log .= "getFile_ts() called
";
if (!file_exists($this->filename)) {
$this->data_ts = 0;
$this->log .= $this->filename." does not exist
";
return false;
}
$this->data_ts = filemtime($this->filename);
return true;
}
function formaturl($l1,$l2){
if (preg_match_all("/(]+src="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
foreach($regs[0] as $num => $url){
$l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
}
}
return $l1;
}
function lIIIIl($l1,$l2){
if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
if(strlen($I2)>0){
$I1 = str_replace(chr(34),"",$I2);
$I1 = str_replace(chr(39),"",$I1);
}else{return $l1;}
$url_parsed = parse_url($l2);
$scheme = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
$host = $url_parsed["host"];
$l3 = $scheme.$host;
if(strlen($l3)==0){return $l1;}
$path = dirname($url_parsed["path"]);if($path[0]==""){$path="";}
$pos = strpos($I1,"#");
if($pos>0) $I1 = substr($I1,0,$pos);
//判断类型
if(preg_match("/^(http|https|ftp):(//|\)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
elseif(substr($I1,0,3)=="../"){//相对路径
while(substr($I1,0,3)=="../"){
$I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
if(strlen($path)>0){
$path = dirname($path);
}
}
$I1 = $l3.$path."/".$I1;
}
elseif(substr($I1,0,2)=="./"){
$I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
}
elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="java script:"){
return $l1;
}else{
$I1 = $l3.$path."/".$I1;
}
return str_replace($I2,""$I1"",$l1);
}
}
?>
用法test.php:
require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>
上一篇: 请教各位老师关于PHP数组的问题
下一篇: sql 语句中的 NULL值