新浪科技文章采集代码
程序员文章站
2024-02-21 16:26:28
...
新浪科技的文章一键采集ThinkPhp适用代码
/* 新浪科技文章采集 */
public function sina_tech() {
/* NEED CAULL PAGE NUM */
$page_num = intval($_POST['get_post_page_num']);
if (empty($page_num)) $page_num = 1;
/* FIRST COUNT */
$post_count_a = M('post')->count();
/* FOR CULL */
for ($page = 1; $page
$fullpage = CurlGetPage('http://roll.tech.sina.com.cn/s/channel.php?ch=05#col=30&spec=&type=&ch=05&k=&offset_page=0&offset_num=0&num=5&asc=&page='.$page);
preg_match_all('/
/* 新浪科技文章采集 */
public function sina_tech() {
/* NEED CAULL PAGE NUM */
$page_num = intval($_POST['get_post_page_num']);
if (empty($page_num)) $page_num = 1;
/* FIRST COUNT */
$post_count_a = M('post')->count();
/* FOR CULL */
for ($page = 1; $page
$fullpage = CurlGetPage('http://roll.tech.sina.com.cn/s/channel.php?ch=05#col=30&spec=&type=&ch=05&k=&offset_page=0&offset_num=0&num=5&asc=&page='.$page);
preg_match_all('/
\s+(.*)\s+/Us', $fullpage, $match);
$fullpage = iconv("GB2312", "UTF-8", $match[1][0]);//echo $data1;die;
preg_match_all('/(.*)/isU', $fullpage, $in_li_tags);
foreach (array_unique($in_li_tags[1]) as $row) {
/* TITLE */
preg_match_all('/(.*)/', $row, $title);
$title = $title[1][0];
/* LINK */
preg_match_all('/href="([^"]*)"/', $row, $link);
$link = $link[1][0];
/* DATE */
preg_match_all('/(.*)/i', $row, $date);
$date = date("Y-", time()) . $date[1][0] . ':00';
// echo $title.' '.$link.' '.$date.'
';
/* GOING THE POST PAGE */
$fullpage_post = CurlGetPage($link);
/* FIX TAGS */
$fullpage_post = preg_replace('/
$fullpage = iconv("GB2312", "UTF-8", $match[1][0]);//echo $data1;die;
preg_match_all('/
foreach (array_unique($in_li_tags[1]) as $row) {
/* TITLE */
preg_match_all('/(.*)/', $row, $title);
$title = $title[1][0];
/* LINK */
preg_match_all('/href="([^"]*)"/', $row, $link);
$link = $link[1][0];
/* DATE */
preg_match_all('/(.*)/i', $row, $date);
$date = date("Y-", time()) . $date[1][0] . ':00';
// echo $title.' '.$link.' '.$date.'
';
/* GOING THE POST PAGE */
$fullpage_post = CurlGetPage($link);
/* FIX TAGS */
$fullpage_post = preg_replace('/
(.*)/isU', '${1}', $fullpage_post);
$fullpage_post = preg_replace('/
$fullpage_post = preg_replace('/
(.*)/Us', '', $fullpage_post);
//echo htmlspecialchars($fullpage_post);die;
/* POST CONTENT */
preg_match_all('/
//echo htmlspecialchars($fullpage_post);die;
/* POST CONTENT */
preg_match_all('/
\s+(.*)\s+/Us', $fullpage_post, $post_content);
/* DEL A TAGS */
$post_content = preg_replace("/]*>(.*)/isU", '${1}', $post_content[1][0]);
// echo '
'.$date.'
'.$postCon.'
';
/* SAVE TO DB */
$post_title_count = M('post')->where("title='$title'")->count();
if ($post_title_count == 0) {
$dataMySql["title"] = $title;
$dataMySql["content"] = $post_content;
$dataMySql["datetime"] = $date;
M('post')->add($dataMySql);
}
}
}
/* LAST COUNT */
$post_count_b = M('post')->count();
$post_add_num = $post_count_b - $post_count_a;
/* CALLBACK */
if ($post_count_a == $post_count_b) {
echo '{"success":1,"msg":"文章数无变化"}';
} else {
echo '{"success":1,"msg":"成功采集 ' . $post_add_num . ' 篇文章"}';
}
}
/* DEL A TAGS */
$post_content = preg_replace("/]*>(.*)/isU", '${1}', $post_content[1][0]);
// echo '
'.$title.'
'.$url.''.$date.'
'.$postCon.'
';
/* SAVE TO DB */
$post_title_count = M('post')->where("title='$title'")->count();
if ($post_title_count == 0) {
$dataMySql["title"] = $title;
$dataMySql["content"] = $post_content;
$dataMySql["datetime"] = $date;
M('post')->add($dataMySql);
}
}
}
/* LAST COUNT */
$post_count_b = M('post')->count();
$post_add_num = $post_count_b - $post_count_a;
/* CALLBACK */
if ($post_count_a == $post_count_b) {
echo '{"success":1,"msg":"文章数无变化"}';
} else {
echo '{"success":1,"msg":"成功采集 ' . $post_add_num . ' 篇文章"}';
}
}
AD:真正免费,域名+虚机+企业邮箱=0元
声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
相关文章
相关视频
专题推荐
-
独孤九贱-php全栈开发教程
全栈 170W+
主讲:Peter-Zhu 轻松幽默、简短易学,非常适合PHP学习入门
-
玉女心经-web前端开发教程
入门 80W+
主讲:灭绝师太 由浅入深、明快简洁,非常适合前端学习入门
-
天龙八部-实战开发教程
实战 120W+
主讲:西门大官人 思路清晰、严谨规范,适合有一定web编程基础学习
- 最新文章
- 热门排行
网友评论
文明上网理性发言,请遵守 新闻评论服务协议
我要评论