利用php抓取批量关键词百度推广广告中网址保存在txt文件中
程序员文章站
2022-05-24 17:45:23
...
结合服务器的定时任务可以定时查找关键词广告的竞争程度
php代码
<?php $fp = @fopen("semallurl.txt", "a+"); $kws1="上海酒店,北京酒店,广州酒店,天津酒店,广州酒店"; $kws=explode(",",$kws1); foreach ($kws as $kw){ $keywords=$kw; $enkeywords=urlencode($keywords); $pageURL="http://www.baidu.com/s?word=$enkeywords"; $contents=fetch($pageURL); /*抓取页面*/ $contents=preg_replace ('/<script[^>]*?>.*?<\/script>/' , "", $contents); /*去掉js代码*/ $contents_left=""; $contents_right=""; $ads_left_green=""; $ads_left_white=""; $contentsbytwoside=""; $ads_right="";/*变量初始化*/ $contentsbytwoside=explode('<div id="content_',$contents); $contents_right=$contentsbytwoside[1]; $contents_right='<div id="content_'.$contents_right;/*搜索结果右边部分*/ $contents_left=$contentsbytwoside[2]; $contents_left='<div id="content_'.$contents_left;/*搜索结果左边部分*/ preg_match_all('/(<div id=\"[0-9]*\" class=\"ec_pp_f ec_pp_top.*?)<a href=\"http:\/\/e\.baidu\.com\//',$contents_left,$ads_left_white); preg_match_all('/(<table class=\"EC_mr15 EC_ppim_top ec_pp_f.*?<\/table>)/',$contents_left,$ads_left_green); preg_match_all('(<div id=\"bdfs[^>]*class=\"EC_im EC_fr EC_PP EC_idea1017 \">.*?<a class=\"EC_BL EC_desc\".*?<\/a>)',$contents_right,$ads_right); echo "------------Keywords ads for".$kw."start ------------------------------------<br>" ; fwrite($fp, "----------".$kw . " ads start------------------------- \r\n"); echo "left ads with green background is<br>"; /*print_r($ads_left_green[0]);*/ foreach ($ads_left_green[0] as $tg1) { preg_match('/<span>.*?<\/span>/' , $tg1,$tg11); fwrite($fp,strip_tags($tg11[0]) . "\r\n"); echo $tg11[0]."<br>"; }; echo "<p>-------------<br>" ; echo "left ads with white background is<br>"; /*print_r($ads_left_white[0]);*/ foreach ($ads_left_white[0] as $tg2) { preg_match('/<span class=\"ec_url\">.*?<\/span>/' , $tg2,$tg22); fwrite($fp,strip_tags($tg22[0]) . "\r\n"); echo $tg22[0]."<br>"; }; echo "<p>-------------<br>" ; echo "right ads with is<br>"; /*print_r($ads_right[0]);*/ foreach ($ads_right[0] as $tg3) { preg_match('/(<font size=\"-1\" class=\"EC_url\">.*?<\/font>)/' , $tg3,$tg33); fwrite($fp,strip_tags($tg33[0]) . "\r\n"); echo $tg33[0]."<br>"; }; echo "---------------Keywords ads for".$kw."END ------------------------------------<br>" ; fwrite($fp, "----------".$kw . " ads End------------------------- \r\n"); }; fwrite($fp, date("Y-m-d H:i:s") . " PHP代码自动运行!\r\n"); fclose($fp); function fetch($Date){ $ch = curl_init(); $timeout = 5; curl_setopt ($ch, CURLOPT_URL, "$Date"); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"); curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout); $contents = curl_exec($ch); curl_close($ch); return $contents; } ?>