欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

php采集远程文章简单类

程序员文章站 2022-03-28 16:35:42
...
跳至
db = $db;
    }

    function geturlfile($url) {
        $url = trim($url);
        $content = '';
        if (extension_loaded('curl')) {
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            $content = curl_exec($ch);
            curl_close($ch);
        } else {
            $content = file_get_contents($url);
        }
        return trim($content);
    }

    function get_all_url($code) {
        preg_match_all('/"\' ]+)["|\']?\s*[^>]*>([^>]+)/is', $code, $arr);
        return array('name' => $arr[2], 'url' => $arr[1]);
    }

    function get_sub_content($str, $start, $end) {
        $start = trim($start);
        $end = trim($end);
        if ($start == '' || $end == '') {
            return $str;
        }
        $str = explode($start, $str);
        $str = explode($end, $str[1]);
        return $str[0];
    }
   
    function vd($var) {
        echo "\r\n";
        echo "\r\n";
        var_dump($var);
        echo "\r\n\r\n";
        echo "";
    }

}

?>geturlfile($url);
//定义采集列表区间
$start = '';
$end = '';
//获取区间内的文章URL和TITLE
$code = $gather->get_sub_content($html, $start, $end);
$newsAry = $gather->get_all_url($code);
//打印出结果
//$gather->vd($newsAry);
$tarGetUrl = $newsAry['url'][0];
//获取目标网址HTML
$html = $gather->geturlfile($tarGetUrl);
//定义采集列表区间
$start = '';
$end = '';
//获取区间内的文章URL和TITLE
$code = $gather->get_sub_content($html, $start, $end);
$killHtml = '';
$killHtml2 = '';
$code = str_replace($killHtml, "", $code);
$code = str_replace($killHtml2, "", $code);
$gather->vd($code);
?>