PHP 文章中的远程图片采集到本地的代码
程序员文章站
2022-05-25 23:19:16
第一步. 先从文章中把所有 用正则 抠出来. 复制代码 代码如下: $message //文章内容 //正则(这个还不是) $reg = "/...
第一步. 先从文章中把所有<img ...> 用正则 抠出来.
$message //文章内容
//正则(这个还不是)
$reg = "/<img[^>]*src=\"(http:\/\/(.+)\/(.+)\.(jpg|gif|bmp|bnp))\"/isu";
//把抠出来的 img 地址存放到 $img_array 变量中
preg_match_all($reg, $message, $img_array, preg_pattern_order);
//过滤重复的图片
$img_array = array_unique($img_array[1]);
第二步. 把$img_array 数组循环一下. 做图片保存和文章位置替换
foreach ($img_array as $img){
//判断是否是自己网站上的 图片
if('xxx.com' != get_domain($img)){// 如果这个图片不是自己服务器上的
//读取图片文件
$gimg = new getimage();
$gimg->source = $img;
$gimg->save_to = './data/temp/';
$file = $gimg->download(); //图片移动到本地
//保存到相册 得到图片保存的位置
$img_path = pic_save($file,0,'');
//文本路径替换
$message = str_replace($img, $img_path, $message);
}
}
....这时候 $message 里面已经图片已经替换为自己服务器本地的地址,并且图片也保存到自己的服务器上.
//下面一个函数 和 类是从网络上找的.
//从url中获得域名
function get_domain($url){
$pattern = "/[\w-]+\.(com|net|org|gov|cc|biz|info|cn)(\.(cn|hk))*/";
preg_match($pattern, $url, $matches);
if(count($matches) > 0) {
return $matches[0];
}else{
$rs = parse_url($url);
$main_url = $rs["host"];
if(!strcmp(long2ip(sprintf("%u",ip2long($main_url))),$main_url)) {
return $main_url;
}else{
$arr = explode(".",$main_url);
$count=count($arr);
$endarr = array("com","net","org","3322");//com.cn net.cn 等情况
if (in_array($arr[$count-2],$endarr)){
$domain = $arr[$count-3].".".$arr[$count-2].".".$arr[$count-1];
}else{
$domain = $arr[$count-2].".".$arr[$count-1];
}
return $domain;
}// end if(!strcmp...)
}// end if(count...)
}// end function
// 从远程吧图片载到服务器本地 的 类
class getimage {
var $source;
var $save_to;
var $quality;
function download($method = 'curl') {
$info = @getimagesize($this->source);
$mime = $info['mime'];
// what sort of image?
$type = substr(strrchr($mime, '/'), 1);
switch ($type){
case 'jpeg':
$image_create_func = 'imagecreatefromjpeg';
$image_save_func = 'imagejpeg';
$new_image_ext = 'jpg';
// best quality: 100
$quality = isset($this->quality) ? $this->quality : 100;
break;
case 'png':
$image_create_func = 'imagecreatefrompng';
$image_save_func = 'imagepng';
$new_image_ext = 'png';
// compression level: from 0 (no compression) to 9
$quality = isset($this->quality) ? $this->quality : 0;
break;
case 'bmp':
$image_create_func = 'imagecreatefrombmp';
$image_save_func = 'imagebmp';
$new_image_ext = 'bmp';
break;
case 'gif':
$image_create_func = 'imagecreatefromgif';
$image_save_func = 'imagegif';
$new_image_ext = 'gif';
break;
case 'vnd.wap.wbmp':
$image_create_func = 'imagecreatefromwbmp';
$image_save_func = 'imagewbmp';
$new_image_ext = 'bmp';
break;
case 'xbm':
$image_create_func = 'imagecreatefromxbm';
$image_save_func = 'imagexbm';
$new_image_ext = 'xbm';
break;
default:
$image_create_func = 'imagecreatefromjpeg';
$image_save_func = 'imagejpeg';
$new_image_ext = 'jpg';
}
if(isset($this->set_extension)){
$ext = strrchr($this->source, ".");
$strlen = strlen($ext);
$new_name = basename(substr($this->source, 0, -$strlen)).'.'.$new_image_ext;
}else{
$new_name = basename($this->source);
}
$save_to = $this->save_to."/blog_insert_temp_".time().mt_rand(1,99).".".$new_image_ext;
//输出对象 组成跟$_file变量一样 得到后自己和平常图片上传处理一样了
$img_info['name'] = basename($this->source);
$img_info['type'] = $mime;
$img_info['size'] = 1000;
$img_info['tmp_name'] = $save_to;
$img_info['error'] = 0;
if($method == 'curl'){
$save_image = $this->loadimagecurl($save_to);
}elseif($method == 'gd'){
$img = $image_create_func($this->source);
if(isset($quality)){
$save_image = $image_save_func($img, $save_to, $quality);
}else{
$save_image = $image_save_func($img, $save_to);
}
}
return $img_info;
}
function loadimagecurl($save_to){
$ch = curl_init($this->source);
$fp = fopen($save_to, "wb");
// set url and other appropriate options
$options = array(curlopt_file => $fp,
curlopt_header => 0,
curlopt_followlocation => 1,
curlopt_timeout => 60); // 1 minute timeout (should be enough)
curl_setopt_array($ch, $options);
curl_exec($ch);
curl_close($ch);
fclose($fp);
}
}
复制代码 代码如下:
$message //文章内容
//正则(这个还不是)
$reg = "/<img[^>]*src=\"(http:\/\/(.+)\/(.+)\.(jpg|gif|bmp|bnp))\"/isu";
//把抠出来的 img 地址存放到 $img_array 变量中
preg_match_all($reg, $message, $img_array, preg_pattern_order);
//过滤重复的图片
$img_array = array_unique($img_array[1]);
第二步. 把$img_array 数组循环一下. 做图片保存和文章位置替换
复制代码 代码如下:
foreach ($img_array as $img){
//判断是否是自己网站上的 图片
if('xxx.com' != get_domain($img)){// 如果这个图片不是自己服务器上的
//读取图片文件
$gimg = new getimage();
$gimg->source = $img;
$gimg->save_to = './data/temp/';
$file = $gimg->download(); //图片移动到本地
//保存到相册 得到图片保存的位置
$img_path = pic_save($file,0,'');
//文本路径替换
$message = str_replace($img, $img_path, $message);
}
}
....这时候 $message 里面已经图片已经替换为自己服务器本地的地址,并且图片也保存到自己的服务器上.
复制代码 代码如下:
//下面一个函数 和 类是从网络上找的.
//从url中获得域名
function get_domain($url){
$pattern = "/[\w-]+\.(com|net|org|gov|cc|biz|info|cn)(\.(cn|hk))*/";
preg_match($pattern, $url, $matches);
if(count($matches) > 0) {
return $matches[0];
}else{
$rs = parse_url($url);
$main_url = $rs["host"];
if(!strcmp(long2ip(sprintf("%u",ip2long($main_url))),$main_url)) {
return $main_url;
}else{
$arr = explode(".",$main_url);
$count=count($arr);
$endarr = array("com","net","org","3322");//com.cn net.cn 等情况
if (in_array($arr[$count-2],$endarr)){
$domain = $arr[$count-3].".".$arr[$count-2].".".$arr[$count-1];
}else{
$domain = $arr[$count-2].".".$arr[$count-1];
}
return $domain;
}// end if(!strcmp...)
}// end if(count...)
}// end function
// 从远程吧图片载到服务器本地 的 类
class getimage {
var $source;
var $save_to;
var $quality;
function download($method = 'curl') {
$info = @getimagesize($this->source);
$mime = $info['mime'];
// what sort of image?
$type = substr(strrchr($mime, '/'), 1);
switch ($type){
case 'jpeg':
$image_create_func = 'imagecreatefromjpeg';
$image_save_func = 'imagejpeg';
$new_image_ext = 'jpg';
// best quality: 100
$quality = isset($this->quality) ? $this->quality : 100;
break;
case 'png':
$image_create_func = 'imagecreatefrompng';
$image_save_func = 'imagepng';
$new_image_ext = 'png';
// compression level: from 0 (no compression) to 9
$quality = isset($this->quality) ? $this->quality : 0;
break;
case 'bmp':
$image_create_func = 'imagecreatefrombmp';
$image_save_func = 'imagebmp';
$new_image_ext = 'bmp';
break;
case 'gif':
$image_create_func = 'imagecreatefromgif';
$image_save_func = 'imagegif';
$new_image_ext = 'gif';
break;
case 'vnd.wap.wbmp':
$image_create_func = 'imagecreatefromwbmp';
$image_save_func = 'imagewbmp';
$new_image_ext = 'bmp';
break;
case 'xbm':
$image_create_func = 'imagecreatefromxbm';
$image_save_func = 'imagexbm';
$new_image_ext = 'xbm';
break;
default:
$image_create_func = 'imagecreatefromjpeg';
$image_save_func = 'imagejpeg';
$new_image_ext = 'jpg';
}
if(isset($this->set_extension)){
$ext = strrchr($this->source, ".");
$strlen = strlen($ext);
$new_name = basename(substr($this->source, 0, -$strlen)).'.'.$new_image_ext;
}else{
$new_name = basename($this->source);
}
$save_to = $this->save_to."/blog_insert_temp_".time().mt_rand(1,99).".".$new_image_ext;
//输出对象 组成跟$_file变量一样 得到后自己和平常图片上传处理一样了
$img_info['name'] = basename($this->source);
$img_info['type'] = $mime;
$img_info['size'] = 1000;
$img_info['tmp_name'] = $save_to;
$img_info['error'] = 0;
if($method == 'curl'){
$save_image = $this->loadimagecurl($save_to);
}elseif($method == 'gd'){
$img = $image_create_func($this->source);
if(isset($quality)){
$save_image = $image_save_func($img, $save_to, $quality);
}else{
$save_image = $image_save_func($img, $save_to);
}
}
return $img_info;
}
function loadimagecurl($save_to){
$ch = curl_init($this->source);
$fp = fopen($save_to, "wb");
// set url and other appropriate options
$options = array(curlopt_file => $fp,
curlopt_header => 0,
curlopt_followlocation => 1,
curlopt_timeout => 60); // 1 minute timeout (should be enough)
curl_setopt_array($ch, $options);
curl_exec($ch);
curl_close($ch);
fclose($fp);
}
}
下一篇: 一个PHP数组应该有多大的分析