欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

php中实现的一个curl批处理的实例 - 猿客

程序员文章站 2022-06-11 17:02:09
...
curl是利用URL语法在命令行方式下工作的开源文件传输工具

本文在php中实现了的一个curl批处理的实例。

代码如下:

  1 header("Content-Type:text/html;charset=utf8");
  2 
  3 /* 先获取两个页面的所有a标签 */
  4 // 初始化两个简单处理句柄
  5 $ch1 = curl_init();
  6 $ch2 = curl_init();
  7 curl_setopt_array($ch1,array(
  8     CURLOPT_URL => 'http://www.sina.com.cn',
  9     CURLOPT_HEADER => 0,
 10     CURLOPT_RETURNTRANSFER => 1,
 11 ));
 12 curl_setopt_array($ch2,array(
 13     CURLOPT_URL => 'http://www.baidu.com/',
 14     CURLOPT_HEADER => 0,
 15     CURLOPT_RETURNTRANSFER => 1,
 16 ));
 17 
 18 // 初始化批处理句柄,并添加简单处理句柄
 19 $mh = curl_multi_init();
 20 curl_multi_add_handle($mh,$ch1);
 21 curl_multi_add_handle($mh,$ch2);
 22 
 23 // 初始化执行状态
 24 $state = null;
 25 
 26 // 执行批处理
 27 do{
 28     $mc = curl_multi_exec($mh,$state);    
 29 }while($mc == CURLM_CALL_MULTI_PERFORM);
 30 while($mc == CURLM_OK && $state) {
 31     while (curl_multi_exec($mh, $state) === CURLM_CALL_MULTI_PERFORM);
 32     // 经过实验,发现curl_multi_select($mh)总是返回-1,意味着一下代码不会执行
 33     if(curl_multi_select($mh) != -1) {
 34         do{
 35             $mc = curl_multi_exec($mh,$state);
 36         }while($mc == CURLM_CALL_MULTI_PERFORM);
 37     }    
 38 }
 39 
 40 // 获取内容
 41 $text  = curl_multi_getcontent($ch1);
 42 $text .= curl_multi_getcontent($ch2);
 43 
 44 // 找到页面中所有的a标签,保存到$matches
 45 $matches = null;
 46 preg_match_all("/(.*?)/",$text,$matches);
 47 
 48 // 关闭各个句柄
 49 curl_multi_remove_handle($mh,$ch1);
 50 curl_multi_remove_handle($mh,$ch2);
 51 curl_multi_close($mh);
 52 
 53 /*在找到的连接中继续查找title标签 */
 54 
 55 $handle = array(); // 存储简单处理句柄的数组
 56 $mhandle = curl_multi_init(); //批处理句柄
 57 // 处理100个页面
 58 foreach(array_slice($matches[1],0,100) as $href) {
 59     $tmp_h = curl_init();
 60     curl_setopt_array($tmp_h,array(
 61         CURLOPT_URL => $href,
 62         CURLOPT_HEADER => 0,
 63         CURLOPT_RETURNTRANSFER => 1,
 64     ));
 65     curl_multi_add_handle($mhandle,$tmp_h);
 66     $handle[] = $tmp_h;
 67 }
 68 do{
 69     $mrc = curl_multi_exec($mhandle,$active);
 70 }while($mrc == CURLM_CALL_MULTI_PERFORM);
 71 while($mrc == CURLM_OK && $active) {
 72     while(curl_multi_exec($mhandle,$active) == CURLM_CALL_MULTI_PERFORM);
 73     if(curl_multi_select($mhandle) != -1) {
 74         do{
 75             $mrc = curl_multi_exec($mhandle,$active);
 76         }while($mrc == CURLM_CALL_MULTI_PERFORM);
 77     }
 78 }
 79 
 80 // 获取这些页面的内容
 81 $mtext = null;
 82 foreach($handle as $tmp_h) {
 83     $mtext .= curl_multi_getcontent($tmp_h);
 84     curl_multi_remove_handle($mhandle, $tmp_h);
 85 }
 86 $mmatches = array();
 87 preg_match_all("/(.*?)/",<span style="color: #800080;">$mtext</span>, <span style="color: #800080;">$mmatches</span><span style="color: #000000;">);
</span><span style="color: #008080;"> 88</span> 
<span style="color: #008080;"> 89</span> <span style="color: #008000;">//</span><span style="color: #008000;"> 编码转换</span>
<span style="color: #008080;"> 90</span> mb_detect_order('GB2312,GBK,BIG5,GB18030,UNICODE ,CP936'<span style="color: #000000;">);
</span><span style="color: #008080;"> 91</span> <span style="color: #0000ff;">foreach</span>(<span style="color: #800080;">$mmatches</span>[1] <span style="color: #0000ff;">as</span> <span style="color: #800080;">$key</span> => <span style="color: #800080;">$val</span><span style="color: #000000;">) {
</span><span style="color: #008080;"> 92</span>     <span style="color: #800080;">$encoding</span> = mb_detect_encoding(<span style="color: #800080;">$val</span><span style="color: #000000;">);
</span><span style="color: #008080;"> 93</span>     <span style="color: #0000ff;">if</span>(<span style="color: #800080;">$encoding</span> != 'UTF-8' && <span style="color: #800080;">$encoding</span> != 'CP936' && <span style="color: #800080;">$encoding</span> != 'GB18030' && <span style="color: #800080;">$encoding</span> !=''<span style="color: #000000;">) {
</span><span style="color: #008080;"> 94</span>         <span style="color: #800080;">$mmatches</span>[1][<span style="color: #800080;">$key</span>] = <span style="color: #008080;">iconv</span>(<span style="color: #800080;">$encoding</span>,'UTF-8//IGNORE',<span style="color: #800080;">$val</span><span style="color: #000000;">);
</span><span style="color: #008080;"> 95</span> <span style="color: #000000;">    }
</span><span style="color: #008080;"> 96</span> <span style="color: #000000;">}
</span><span style="color: #008080;"> 97</span> 
<span style="color: #008080;"> 98</span> <span style="color: #008000;">//</span><span style="color: #008000;"> 打印title信息</span>
<span style="color: #008080;"> 99</span> <span style="color: #008080;">var_dump</span>(<span style="color: #800080;">$mmatches</span>[1<span style="color: #000000;">]);
</span><span style="color: #008080;">100</span> 
<span style="color: #008080;">101</span> <span style="color: #008000;">//</span><span style="color: #008000;"> 关闭批处理句柄</span>
<span style="color: #008080;">102</span> curl_multi_close(<span style="color: #800080;">$mhandle</span>);