欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

php获取远程网页源码的程序代码

程序员文章站 2024-04-04 22:51:11
...
有时我们需要做一些采集需要下载远程网页源码到本来了,在这里我们整理了一些php获取远程网页源码代码,希望对各位会有所帮助。

php的curl函数

基本例子

php fopen函数

HTTPn");
// open a file using http protocol
if (!($myFile = fopen("http://www.phprm.com/", "r"))) {
    print ("file could not be opened");
    exit;
}
while (!feof($myFile)) {
    // read a line from the file
    $myLine = fgetss($myFile, 255);
    print ("$myLine 
n"); } // close the file fclose($myFile); print ("

FTP

n"); print ("
n"); // open a file using ftp protocol if (!($myFile = fopen("ftp://ftp.php.net/welcome.msg", "r"))) { print ("file could not be opened"); exit; } while (!feof($myFile)) { // read a line from the file $myLine = fgetss($myFile, 255); print ("$myLine
n"); } // close the file fclose($myFile); print ("

Local

n"); print ("
n"); // open a local file if (!($myFile = fopen("data.txt", "r"))) { print ("file could not be opened"); exit; } while (!feof($myFile)) { // read a line from the file $myLine = fgetss($myFile, 255); print ("$myLine
n"); } // close the file fclose($myFile); ?>

file_get_contents函数

抓取远程网页源码类

_url;
        $pos = strpos($req, '://');
        $this->_protocol = strtolower(substr($req, 0, $pos));
        $req = substr($req, $pos + 3);
        $pos = strpos($req, '/');
        if ($pos === false) $pos = strlen($req);
        $host = substr($req, 0, $pos);
        if (strpos($host, ':') !== false) {
            list($this->_host, $this->_port) = explode(':', $host);
        } else {
            $this->_host = $host;
            $this->_port = ($this->_protocol == 'https') ? 443 : 80;
        }
        $this->_uri = substr($req, $pos);
        if ($this->_uri == '') $this->_uri = '/';
    }
    // constructor
    function HTTPRequest($url) {
        $this->_url = $url;
        $this->_scan_url();
    }
    // download URL to string
    function DownloadToString() {
        $crlf = "rn";
        // generate request
        $req = 'GET ' . $this->_uri . ' HTTP/1.0' . $crlf . 'Host: ' . $this->_host . $crlf . $crlf;
        // fetch
        $this->_fp = fsockopen(($this->_protocol == 'https' ? 'ssl://' : '') . $this->_host, $this->_port);
        fwrite($this->_fp, $req);
        while (is_resource($this->_fp) && $this->_fp && !feof($this->_fp)) $response.= fread($this->_fp, 1024);
        fclose($this->_fp);
        // split header and body
        $pos = strpos($response, $crlf . $crlf);
        if ($pos === false) return ($response);
        $header = substr($response, 0, $pos);
        $body = substr($response, $pos + 2 * strlen($crlf));
        // parse headers
        $headers = array();
        $lines = explode($crlf, $header);
        foreach ($lines as $line) if (($pos = strpos($line, ':')) !== false) $headers[strtolower(trim(substr($line, 0, $pos))) ] = trim(substr($line, $pos + 1));
        // redirection?
        if (isset($headers['location'])) {
            $http = new HTTPRequest($headers['location']);
            return ($http->DownloadToString($http));
        } else {
            return ($body);
        }
    }
}
//使用方法
$r = new HTTPRequest('http://www.phprm.com');
$str = $r->DownloadToString();
?>