他们在爬网页时,是怎么用的代理IP?
程序员文章站
2022-04-24 14:52:29
...
比如用CURL的话,代理IP该怎么用啊,是开启软件还是直接在CURL做的设置啊,求指教。
回复内容:
比如用CURL的话,代理IP该怎么用啊,是开启软件还是直接在CURL做的设置啊,求指教。
http://php.net/manual/en/book.curl.php
php
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
function curlPost($url, $post='', $autoFollow=0){
$ch = curl_init();
$user_agent = 'Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/5
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
// 2. 设置选项,包括URL
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:61.135.169.125', 'CLIENT-IP:61.135.169.125')); //构造IP
curl_setopt($ch, CURLOPT_REFERER, "http://www.baidu.com/"); //构造来路
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
if($autoFollow){
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //启动跳转链接
curl_setopt($ch, CURLOPT_AUTOREFERER, true); //多级自动跳转
}
//
if($post!=''){
curl_setopt($ch, CURLOPT_POST, 1);//post提交方式
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
}
// 3. 执行并获取HTML文档内容
$output = curl_exec($ch);
curl_close($ch);
return $output;
}
CURLOPT_HTTPHEADER,
写爬虫必备