curl爬教务系统的数据解决思路
程序员文章站
2022-06-14 16:38:12
...
curl爬教务系统的数据
你好各位又来请教大家了,我现在又遇到这样的情况
代码是这样子的
header ( "content-Type: text/html; charset=utf-8" );
require_once 'search.php';
// 第一步:提交数据,生成cookie,将cookie保存在临时目录下
$cookiejar = realpath ( 'cookie.txt' );
$id=$_GET['id'];
$password=$_GET['password'];
$year=$_GET['year'];
$term=$_GET['term'];
$ch = curl_init ();
$login_url = "http://211.67.32.51/default3.aspx";
$curlPost = "__VIEWSTATE=dDw5NTI3MzM0NTQ7dDw7bDxpPDE%2BO2k8NT47PjtsPHQ8O2w8aTw4PjtpPDExPjs%2BO2w8dDxwPDtwPGw8b25jbGljazs%2BO2w8d2luZG93LmNsb3NlKClcOzs%2BPj47Oz47dDxwPGw8VmlzaWJsZTs%2BO2w8bzxmPjs%2BPjs7Pjs%2BPjt0PHA8bDxWaXNpYmxlOz47bDxvPGY%2BOz4%2BOzs%2BOz4%2BO2w8aW1nREw7aW1nVEM7aW1nUU1NOz4%2BiyfPvg3FujyU8xX773LO%2FCbCuTw%3D&tbYHM=K061141026&tbPSW=100311&ddlSF=学生&imgDL.x=40&imgDL.y=7";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_URL, $login_url );
// 启用时会将头文件的信息作为数据流输出
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, 'http://211.67.32.51/' );
curl_setopt ( $ch, CURLOPT_POST, 1 );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
// 设置连接结束后保存cookie信息的文件
curl_setopt ( $ch, CURLOPT_COOKIEJAR, $cookiejar );
$data=curl_exec ( $ch );
//$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
//echo ''.$data.' ';
$curlPost = "xh=K061141026";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_URL, "http://211.67.32.51/xscj.aspx?xh=K061141026" );
// 启用时会将头文件的信息作为数据流输出
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, 'http://211.67.32.51/' );
curl_setopt ( $ch, CURLOPT_POST, 0 );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
// 设置连接结束后保存cookie信息的文件
curl_setopt ( $ch, CURLOPT_COOKIEFILE, $cookiejar );
$data=curl_exec ( $ch );
$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
preg_match_all ( '/\/i', $data, $matches );
//上面的模式修正符不能加s
//file_put_contents("d://value.txt",$matches[1][0]);
//echo var_dump($matches[1][0])."
";
//echo $matches[1][0];
//echo ''.$data.' ';
echo search3($id,$year, $term,$ch,$matches[1][0]);
?>
function search3($id,$year, $term, $ch,$value){
//$data=file_get_contents("d://value.txt");
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
$curlPost = "xh=K061141026&__VIEWSTATE=$value&Button2=按学年学期查询&ddlKCLX=必修&xn=2012-2013&xq=1";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_URL, "http://211.67.32.51/xscj.aspx" );
curl_setopt ( $ch, CURLOPT_HEADER, 0);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_POST, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, "http://211.67.32.51/xscj.aspx?xh=K061141026" );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
curl_setopt ( $ch, CURLOPT_COOKIEFILE, $cookiejar ); // 要回传cookie
$data = curl_exec ( $ch );
curl_close ( $ch );
$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
/*preg_match_all ( '/\\s*\(.*?)\\s*\
(.*?)\/is', $data, $matches );
foreach ( $matches [1] as $key => $val )
$nav =$nav ."\n". $val . "---" . $matches [2] [$key];*/
return $data;
}
以上程序执行到search3时出现了,其他的都可正常返回数据。我问过一个前辈,他的回答是“我也不清楚喔。我有印象我们做正方系统遇到这种问题有可能是参数传错了,有可能是编码错了,也有可能没设置Referer这个参数”。请大家帮我看下问题出在哪里,有兴趣的可以帮我调试下,代理服务器都是真实可用的。下面是几个post参数和头信息
你好各位又来请教大家了,我现在又遇到这样的情况
代码是这样子的
header ( "content-Type: text/html; charset=utf-8" );
require_once 'search.php';
// 第一步:提交数据,生成cookie,将cookie保存在临时目录下
$cookiejar = realpath ( 'cookie.txt' );
$id=$_GET['id'];
$password=$_GET['password'];
$year=$_GET['year'];
$term=$_GET['term'];
$ch = curl_init ();
$login_url = "http://211.67.32.51/default3.aspx";
$curlPost = "__VIEWSTATE=dDw5NTI3MzM0NTQ7dDw7bDxpPDE%2BO2k8NT47PjtsPHQ8O2w8aTw4PjtpPDExPjs%2BO2w8dDxwPDtwPGw8b25jbGljazs%2BO2w8d2luZG93LmNsb3NlKClcOzs%2BPj47Oz47dDxwPGw8VmlzaWJsZTs%2BO2w8bzxmPjs%2BPjs7Pjs%2BPjt0PHA8bDxWaXNpYmxlOz47bDxvPGY%2BOz4%2BOzs%2BOz4%2BO2w8aW1nREw7aW1nVEM7aW1nUU1NOz4%2BiyfPvg3FujyU8xX773LO%2FCbCuTw%3D&tbYHM=K061141026&tbPSW=100311&ddlSF=学生&imgDL.x=40&imgDL.y=7";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_URL, $login_url );
// 启用时会将头文件的信息作为数据流输出
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, 'http://211.67.32.51/' );
curl_setopt ( $ch, CURLOPT_POST, 1 );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
// 设置连接结束后保存cookie信息的文件
curl_setopt ( $ch, CURLOPT_COOKIEJAR, $cookiejar );
$data=curl_exec ( $ch );
//$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
//echo '
$curlPost = "xh=K061141026";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_URL, "http://211.67.32.51/xscj.aspx?xh=K061141026" );
// 启用时会将头文件的信息作为数据流输出
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, 'http://211.67.32.51/' );
curl_setopt ( $ch, CURLOPT_POST, 0 );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
// 设置连接结束后保存cookie信息的文件
curl_setopt ( $ch, CURLOPT_COOKIEFILE, $cookiejar );
$data=curl_exec ( $ch );
$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
preg_match_all ( '/\/i', $data, $matches );
//上面的模式修正符不能加s
//file_put_contents("d://value.txt",$matches[1][0]);
//echo var_dump($matches[1][0])."
";
//echo $matches[1][0];
//echo '
echo search3($id,$year, $term,$ch,$matches[1][0]);
?>
function search3($id,$year, $term, $ch,$value){
//$data=file_get_contents("d://value.txt");
curl_setopt ( $ch, CURLOPT_PROXY, 'jackdowosn.gnway.net:81');
$curlPost = "xh=K061141026&__VIEWSTATE=$value&Button2=按学年学期查询&ddlKCLX=必修&xn=2012-2013&xq=1";
$curlPost = iconv("UTF-8", "GBK", $curlPost);
curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/4");
curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION,true);
curl_setopt ( $ch, CURLOPT_URL, "http://211.67.32.51/xscj.aspx" );
curl_setopt ( $ch, CURLOPT_HEADER, 0);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_POST, 1 );
curl_setopt ( $ch, CURLOPT_REFERER, "http://211.67.32.51/xscj.aspx?xh=K061141026" );
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $curlPost );
curl_setopt ( $ch, CURLOPT_COOKIEFILE, $cookiejar ); // 要回传cookie
$data = curl_exec ( $ch );
curl_close ( $ch );
$data = mb_convert_encoding ( $data, "utf-8", "GBK" );
/*preg_match_all ( '/\
foreach ( $matches [1] as $key => $val )
$nav =$nav ."\n". $val . "---" . $matches [2] [$key];*/
return $data;
}
以上程序执行到search3时出现了,其他的都可正常返回数据。我问过一个前辈,他的回答是“我也不清楚喔。我有印象我们做正方系统遇到这种问题有可能是参数传错了,有可能是编码错了,也有可能没设置Referer这个参数”。请大家帮我看下问题出在哪里,有兴趣的可以帮我调试下,代理服务器都是真实可用的。下面是几个post参数和头信息
相关文章
相关视频
- 教你使用PHP数据库迁移工具“Phinx”
- 详解win10下PHP的安装配置(以php5.6为...
- php Swoole实现毫秒定时计划任务(详解)
- CodeIgniter基本配置详细介绍_PHP
- curl爬教务系统的数据解决思路
专题推荐
-
独孤九贱-php全栈开发教程
全栈 170W+
主讲:Peter-Zhu 轻松幽默、简短易学,非常适合PHP学习入门
-
玉女心经-web前端开发教程
入门 80W+
主讲:灭绝师太 由浅入深、明快简洁,非常适合前端学习入门
-
天龙八部-实战开发教程
实战 120W+
主讲:西门大官人 思路清晰、严谨规范,适合有一定web编程基础学习
- 最新文章
- 热门排行
网友评论
文明上网理性发言,请遵守 新闻评论服务协议
我要评论