一个用php实现的获取URL信息的类
程序员文章站
2023-09-20 20:08:06
获取url信息的类 使用这个类,你能获得url的如下信息: - host - path - statuscode&nb...
获取url信息的类
使用这个类,你能获得url的如下信息:
- host
- path
- statuscode (eg. 404,200, ...)
- http version
- server
- content type
- date
- the whole header string of the url
<?
/**
* class for getting information about url's
* @author sven wagener <[email]sven.wagener@intertribe.de[/email]>
* @copyright intertribe limited
* @php中文社区收集整理 [url]www.phpnet.cn[/url]
* @include funktion:_include_
*/
class url{
var $url="";
var $url_host;
var $url_path;
var $file="";
var $code="";
var $code_desc="";
var $http_version=""; // variable for http version
var $header_stream;
var $header_array;
var $timeout="1";
/**
* constructor of class url
* @param string $url the complete url
* @desc constructor of class url
*/
function url($url){
$this->url=$url;
$url_array=parse_url($this->url);
$this->url_host=$url_array['host'];
$this->url_path=$url_array['path'];
if($this->url_path==""){
$this->url_path="/";
}
$this->refresh_headerinfo();
}
/**
* returns the whole url
* @return string $url the whole url
* @desc returns the whole url
*/
function get_url(){
return $this->url;
}
/**
* returns the host of the url
* @return string $url_host the host of the url
* @desc returns the host of the url
*/
function get_url_host(){
return $this->url_host;
}
/**
* returns the path of the url
* @return string $url_path the path of the url
* @desc returns the path of the url
*/
function get_url_path(){
return $this->url_path;
}
/**
* returns the status code of the url
* @return string $status_code the status code
* @desc returns the status code of the url
*/
function get_statuscode(){
return $this->code;
}
/**
* returns the status code description of the url
* @return string $status_code_desc the status code description
* @desc returns the status code description of the url
*/
function get_statuscode_desc(){
return $this->code_desc;
}
/**
* returns the http version of the url by the returned headers of the server
* @return string $http_version the http version
* @desc returns the http version of the url by the returned headers of the server
*/
function get_info_http_version(){
return $this->http_version;
}
/**
* returns the server type of the url's host by the returned headers of the server
* @return string header_array['server'] the server type
* @desc returns the server type of the url's host by the returned headers of the server
*/
function get_info_server(){
return $this->header_array['server'];
}
/**
* returns the date of the url's host by the returned headers of the server
* @return string $header_array['date'] the date
* @desc returns the date of the url's host by the returned headers of the server
*/
function get_info_date(){
return $this->header_array['date'];
}
/*
function get_info_content_length(){
return $this->header_array['content-length'];
}
*/
/**
* returns the content type by the returned headers of the server
* @return string header_array['content-type'] the content type
* @desc returns the content type by the returned headers of the server
*/
function get_info_content_type(){
return $this->header_array['content-type'];
}
/**
* returns the content of the url without the headers
* @return string $content the content
* @desc returns the content of the url without the headers
*/
function get_content(){
// get a web page into a string
$string = implode ('', file ($this->url));
return $string;
}
/**
* returns the whole header of url without content
* @return string $header the header
* @desc returns the whole header of url without content
*/
function get_header_stream(){
return $this->header_stream;
}
/**
* returns the whole headers of the url in an array
* @return array $header_array the headers in an array
* @desc returns the whole headers of the url in an array
*/
function get_headers(){
return $this->header_array;
}
/**
* refreshes the header information
* @desc refreshes the header information
*/
function refresh_headerinfo(){
// open socket for connection via port 80 to put headers
$fp = fsockopen ($this->url_host, 80, $errno, $errstr, 30);
if (!$fp) {
// echo "$errstr ($errno)";
if($errno==0){
$errstr="server not found";
}
$this->code=$errno;
$this->code_desc=$errstr;
} else {
$put_string="get ".$this->url_path." http/1.0rnhost: ".$this->url_host."rnrn";
fputs ($fp, $put_string);
@socket_set_timeout($fp,$this->timeout);
$stream="";
$this->header_array="";
$header_end=false;
// getting header string and creating header array
$i=0;
while (!feof($fp) && !$header_end) {
$line=fgets($fp,128);
if(strlen($line)==2){
$header_end=true;
}else{
if($i==0){
$line1=$line;
}
$stream.=$line;
$splitted_line=split(":",$line);
$this->header_array[$splitted_line[0]]=$splitted_line[1];
$i++;
}
}
fclose ($fp);
$this->header_stream=$stream;
$splitted_stream=split(" ",$line1);
// getting status code and description of the url
$this->code=$splitted_stream[1];
$this->code_desc=$splitted_stream[2];
if(count($splitted_stream)>3){
for($i=3;$i<count($splitted_stream);$i++){
$this->code_desc.=" ".$splitted_stream[$i];
}
}
// cleaning up for n and r
$this->code_desc=preg_replace("[\n]","",$this->code_desc);
$this->code_desc=preg_replace("[\r]","",$this->code_desc);
// getting http version
$http_array=split("/",$splitted_stream[0]);
$this->http_version=$http_array[1];
}
}
/**
* sets the timeout for getting header data from server
* @param int $seconds time for timeout in seconds
* @desc sets the timeout for getting header data from server
*/
function set_timeout($seconds){
$this->timeout=$seconds;
}
}
?>
<?php
include("url.class.php");
$url=new url("[url]http://www.phpnet.cn/[/url]");
echo $url->get_header_stream();
$headers=$url->get_headers();
echo $headers['server'];
echo $url->get_content();
echo "url: <b>".$url->get_url()."</b><br>n";
echo "url host: ".$url->get_url_host()."<br>n";
echo "url path: ".$url->get_url_path()."<br>n<br>n";
echo "statuscode: ".$url->get_statuscode()."<br>n";
echo "statuscode description: ".$url->get_statuscode_desc()."<br>n";
echo "http version: ".$url->get_info_http_version()."<br>n";
echo "server: ".$url->get_info_server()."<br>n";
echo "content type: ".$url->get_info_content_type()."<br>n";
echo "date: ".$url->get_info_date()."<br>n<br>n";
echo "whole headers:<br>n";
echo $url->get_header_stream();
?>
使用这个类,你能获得url的如下信息:
- host
- path
- statuscode (eg. 404,200, ...)
- http version
- server
- content type
- date
- the whole header string of the url
复制代码 代码如下:
<?
/**
* class for getting information about url's
* @author sven wagener <[email]sven.wagener@intertribe.de[/email]>
* @copyright intertribe limited
* @php中文社区收集整理 [url]www.phpnet.cn[/url]
* @include funktion:_include_
*/
class url{
var $url="";
var $url_host;
var $url_path;
var $file="";
var $code="";
var $code_desc="";
var $http_version=""; // variable for http version
var $header_stream;
var $header_array;
var $timeout="1";
/**
* constructor of class url
* @param string $url the complete url
* @desc constructor of class url
*/
function url($url){
$this->url=$url;
$url_array=parse_url($this->url);
$this->url_host=$url_array['host'];
$this->url_path=$url_array['path'];
if($this->url_path==""){
$this->url_path="/";
}
$this->refresh_headerinfo();
}
/**
* returns the whole url
* @return string $url the whole url
* @desc returns the whole url
*/
function get_url(){
return $this->url;
}
/**
* returns the host of the url
* @return string $url_host the host of the url
* @desc returns the host of the url
*/
function get_url_host(){
return $this->url_host;
}
/**
* returns the path of the url
* @return string $url_path the path of the url
* @desc returns the path of the url
*/
function get_url_path(){
return $this->url_path;
}
/**
* returns the status code of the url
* @return string $status_code the status code
* @desc returns the status code of the url
*/
function get_statuscode(){
return $this->code;
}
/**
* returns the status code description of the url
* @return string $status_code_desc the status code description
* @desc returns the status code description of the url
*/
function get_statuscode_desc(){
return $this->code_desc;
}
/**
* returns the http version of the url by the returned headers of the server
* @return string $http_version the http version
* @desc returns the http version of the url by the returned headers of the server
*/
function get_info_http_version(){
return $this->http_version;
}
/**
* returns the server type of the url's host by the returned headers of the server
* @return string header_array['server'] the server type
* @desc returns the server type of the url's host by the returned headers of the server
*/
function get_info_server(){
return $this->header_array['server'];
}
/**
* returns the date of the url's host by the returned headers of the server
* @return string $header_array['date'] the date
* @desc returns the date of the url's host by the returned headers of the server
*/
function get_info_date(){
return $this->header_array['date'];
}
/*
function get_info_content_length(){
return $this->header_array['content-length'];
}
*/
/**
* returns the content type by the returned headers of the server
* @return string header_array['content-type'] the content type
* @desc returns the content type by the returned headers of the server
*/
function get_info_content_type(){
return $this->header_array['content-type'];
}
/**
* returns the content of the url without the headers
* @return string $content the content
* @desc returns the content of the url without the headers
*/
function get_content(){
// get a web page into a string
$string = implode ('', file ($this->url));
return $string;
}
/**
* returns the whole header of url without content
* @return string $header the header
* @desc returns the whole header of url without content
*/
function get_header_stream(){
return $this->header_stream;
}
/**
* returns the whole headers of the url in an array
* @return array $header_array the headers in an array
* @desc returns the whole headers of the url in an array
*/
function get_headers(){
return $this->header_array;
}
/**
* refreshes the header information
* @desc refreshes the header information
*/
function refresh_headerinfo(){
// open socket for connection via port 80 to put headers
$fp = fsockopen ($this->url_host, 80, $errno, $errstr, 30);
if (!$fp) {
// echo "$errstr ($errno)";
if($errno==0){
$errstr="server not found";
}
$this->code=$errno;
$this->code_desc=$errstr;
} else {
$put_string="get ".$this->url_path." http/1.0rnhost: ".$this->url_host."rnrn";
fputs ($fp, $put_string);
@socket_set_timeout($fp,$this->timeout);
$stream="";
$this->header_array="";
$header_end=false;
// getting header string and creating header array
$i=0;
while (!feof($fp) && !$header_end) {
$line=fgets($fp,128);
if(strlen($line)==2){
$header_end=true;
}else{
if($i==0){
$line1=$line;
}
$stream.=$line;
$splitted_line=split(":",$line);
$this->header_array[$splitted_line[0]]=$splitted_line[1];
$i++;
}
}
fclose ($fp);
$this->header_stream=$stream;
$splitted_stream=split(" ",$line1);
// getting status code and description of the url
$this->code=$splitted_stream[1];
$this->code_desc=$splitted_stream[2];
if(count($splitted_stream)>3){
for($i=3;$i<count($splitted_stream);$i++){
$this->code_desc.=" ".$splitted_stream[$i];
}
}
// cleaning up for n and r
$this->code_desc=preg_replace("[\n]","",$this->code_desc);
$this->code_desc=preg_replace("[\r]","",$this->code_desc);
// getting http version
$http_array=split("/",$splitted_stream[0]);
$this->http_version=$http_array[1];
}
}
/**
* sets the timeout for getting header data from server
* @param int $seconds time for timeout in seconds
* @desc sets the timeout for getting header data from server
*/
function set_timeout($seconds){
$this->timeout=$seconds;
}
}
?>
复制代码 代码如下:
<?php
include("url.class.php");
$url=new url("[url]http://www.phpnet.cn/[/url]");
echo $url->get_header_stream();
$headers=$url->get_headers();
echo $headers['server'];
echo $url->get_content();
echo "url: <b>".$url->get_url()."</b><br>n";
echo "url host: ".$url->get_url_host()."<br>n";
echo "url path: ".$url->get_url_path()."<br>n<br>n";
echo "statuscode: ".$url->get_statuscode()."<br>n";
echo "statuscode description: ".$url->get_statuscode_desc()."<br>n";
echo "http version: ".$url->get_info_http_version()."<br>n";
echo "server: ".$url->get_info_server()."<br>n";
echo "content type: ".$url->get_info_content_type()."<br>n";
echo "date: ".$url->get_info_date()."<br>n<br>n";
echo "whole headers:<br>n";
echo $url->get_header_stream();
?>
上一篇: DISCUZ 分页代码