PHP特定爬虫程序备份
程序员文章站
2022-06-10 11:20:12
...
用于爬取一个网站上面的黄页信息 ?PHPerror_reporting(E_ALL^E_NOTICE);$conn = @ mysql_connect("localhost", "*******", "************");mysql_select_db("**********", $conn);mysql_query("set names 'utf8'"); date_default_timezone_set("ETC/GMT-8")
用于爬取一个网站上面的黄页信息
(.*)/ms", $fcontents, $li_span); $exp_li_span = explode("
(.*)/mUs", $fcontent02, $url_contron);
$contron_str = preg_replace('/(.*?)/','',$url_contron[1]);
$contron_str = preg_replace('/(.*?)/','',$contron_str);
$contron_str_exp = explode("
",$contron_str); foreach($contron_str_exp as $li_exp_span){ $li_exp_span = DeleteHtml($li_exp_span); $li_exp_sub = mb_substr($li_exp_span,0,2,'utf-8'); if($li_exp_sub == "名称"){ $li_span = strip_tags($li_exp_span); $name = explode(":",$li_span); } if($li_exp_sub == "电话"){ $strip_str_phone = strip_tags($li_exp_span); $phone = explode(":",$strip_str_phone); } if($li_exp_sub == "手机"){ $strip_str_tel = strip_tags($li_exp_span); $tel = explode(":",$strip_str_tel); } if($li_exp_sub == "地址"){ $strip_str_addres = strip_tags($li_exp_span); $addres = explode(":",$strip_str_addres); } if($li_exp_sub == "标签"){ $strip_str_tag = strip_tags($li_exp_span); $tag = explode(":",$strip_str_tag); } } $insert_rs = mysql_query("INSERT INTO `ruzhouren`.`rzrmh_tel` (`id`, `name`, `status`, `order`, `url`, `vip`, `tel`, `tel2`, `addr`, `add_uid`, `own_uid`, `add_dateline`, `type`, `http`) VALUES (NULL, '$name[1]', '1', '1', '0', '1', '$phone[1]', '$tel[1]', '$addres[1]', '1', '1', '$time_dateline', '0', '$http');"); if($insert_rs){ echo "insert-----$name[1] Succeed!"."\n"; } //print_r($contron_str_exp); } sleep(5); //print_r($pre_li); /* v1.0 foreach($exp_li_span as $span){ $ex_url = ""; $tow_span = explode("$ex_url",$span); $string = preg_replace('//','',$tow_span[0]); $string = preg_replace('//','',$string); $string_ok = preg_replace('/target="_blank" >/','',$string); $span_2 = explode(".html",$tow_span[1]); if(count($span_2) >2 ){ $span_exp = $span_2[2]; }else{ $span_exp = $span_2[1]; } $string2 = preg_replace('//','',$span_exp); $string2 = preg_replace('//','',$string2); $string2_ok = preg_replace('/">/','',$string2 ); print $string_ok."------".$string2_ok; }*/ sleep(10); } ?>
",$contron_str); foreach($contron_str_exp as $li_exp_span){ $li_exp_span = DeleteHtml($li_exp_span); $li_exp_sub = mb_substr($li_exp_span,0,2,'utf-8'); if($li_exp_sub == "名称"){ $li_span = strip_tags($li_exp_span); $name = explode(":",$li_span); } if($li_exp_sub == "电话"){ $strip_str_phone = strip_tags($li_exp_span); $phone = explode(":",$strip_str_phone); } if($li_exp_sub == "手机"){ $strip_str_tel = strip_tags($li_exp_span); $tel = explode(":",$strip_str_tel); } if($li_exp_sub == "地址"){ $strip_str_addres = strip_tags($li_exp_span); $addres = explode(":",$strip_str_addres); } if($li_exp_sub == "标签"){ $strip_str_tag = strip_tags($li_exp_span); $tag = explode(":",$strip_str_tag); } } $insert_rs = mysql_query("INSERT INTO `ruzhouren`.`rzrmh_tel` (`id`, `name`, `status`, `order`, `url`, `vip`, `tel`, `tel2`, `addr`, `add_uid`, `own_uid`, `add_dateline`, `type`, `http`) VALUES (NULL, '$name[1]', '1', '1', '0', '1', '$phone[1]', '$tel[1]', '$addres[1]', '1', '1', '$time_dateline', '0', '$http');"); if($insert_rs){ echo "insert-----$name[1] Succeed!"."\n"; } //print_r($contron_str_exp); } sleep(5); //print_r($pre_li); /* v1.0 foreach($exp_li_span as $span){ $ex_url = ""; $tow_span = explode("$ex_url",$span); $string = preg_replace('//','',$tow_span[0]); $string = preg_replace('//','',$string); $string_ok = preg_replace('/target="_blank" >/','',$string); $span_2 = explode(".html",$tow_span[1]); if(count($span_2) >2 ){ $span_exp = $span_2[2]; }else{ $span_exp = $span_2[1]; } $string2 = preg_replace('//','',$span_exp); $string2 = preg_replace('//','',$string2); $string2_ok = preg_replace('/">/','',$string2 ); print $string_ok."------".$string2_ok; }*/ sleep(10); } ?>
声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
相关文章
相关视频