欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  后端开发

路径中包含汉语言时,PHP解析失败

程序员文章站 2022-06-17 12:14:40
...
路径中包含中文时,PHP解析失败

header("Content-Type: text/html; charset=utf-8");
//static $count=0;

//抓包,识别ttx扩展名的文件,提取前100个字节,并插入数据库
function read100Bytes(&$extAndDir)
{
$conn=new mysqli("localhost","root","cai123","test");
if(mysqli_connect_errno())
{
die(mysqli_connect_error());
}
$conn->query("set names utf8");
$sql="insert into spiderTXT(file_name,file_link,file_100_byte) values(?,?,?)";
$stmt=$conn->prepare($sql);
$stmt->bind_param("sss",$file_name,$file_link,$file_100_byte);


$textFile=$extAndDir["text"];

foreach($textFile as $link)
{
if(strtolower(pathinfo($link)["extension"])=="txt")
{
if(file_exists($link) && is_readable($link))
{
$fd=fopen($link,"r") or die("打开文件失败".__line__);
$content=fread($fd,100);
//return $content;
$file_name=pathinfo($link)["basename"];
$file_link=pathinfo($link)["dirname"];
$file_100_byte=$content;
$stmt->execute();
$stmt->close();
fclose($fd);
}
else
{
die("文件不存在或者不可读".__line__);
}

}

}



$conn->close();
}
//根据扩展名分类
function extFilter(&$extAndDir,&$fileList,&$extFormat)
{
$ext=$extAndDir[0];
$link=$extAndDir[1];
if(in_array(strtolower($ext),$extFormat["img"],true))
{
array_push($fileList["img"],$link);
}
else if(in_array(strtolower($ext),$extFormat["video"],true))
{
array_push($fileList["video"],$link);
}
else if(in_array(strtolower($ext),$extFormat["sound"],true))
{
array_push($fileList["sound"],$link);
}
else if(in_array(strtolower($ext),$extFormat["text"],true))
{
array_push($fileList["text"],$link);
}
else if(in_array(strtolower($ext),$extFormat["pack"],true))
{
array_push($fileList["pack"],$link);
}
else if(in_array(strtolower($ext),$extFormat["execute"],true))
{
array_push($fileList["execute"],$link);
}
else
{
array_push($fileList["other"],$link);
}
}

//提取路径名,因为使用dirname无法提取完整的路径名,所以写了这个
function getDirName($path)
{
$pat="#(\.\w{2,3})$#";
//如果传的是路径+文件如 c:\sys.ini, 则提取路径名
if(preg_match($pat,$path,$match))
{
//echo "extension name: ".$match[1];
$pat2="#^(.*)/(\w{1,100}\.\w{2,3})$#i";
if(preg_match($pat2, $path,$mat))
{
return $mat[1];
}
else
{
echo "not match".__line__."
";
}
}
else
{
return $path;
}
}


//读取路径,并根据文件的扩展名分类
function readFileList($path,&$extFormat)
{
static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array(),"execute"=>array(),"other"=>array());
$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//echo $encoding;
//exit();
//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))
//{
$path=iconv($encoding,"utf-8",$path);
//}

$dirName=getDirName($path);
//$dirName=dirname($path);

//根据文件夹的名字,命名数组的关键字
/* $pat="#/(.+)$#i";
preg_match($pat, $dirName,$match);
$match=explode("/",$match[1]);
$packs=array_pop($match);
$fileList["$packs"]=null;
*/
if ($fd=opendir($dirName))
{
while($fileName=readdir($fd))
{
$encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//echo $encoding;
//exit();
//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))
//{
$fileName=iconv($encoding,"utf-8",$fileName);
//}

if($fileName !="." && $fileName !="..")
{

if(is_file($dirName.'/'.$fileName))
{
@$extName=pathinfo($path."/".$fileName)["extension"];
$linkName=$dirName."/".$fileName;

$extAndDir=array($extName,$linkName);
extFilter($extAndDir,$fileList,$extFormat);
//read100Bytes($extAndDir);

}
else if(is_dir($dirName.'/'.$fileName))
{
//$fileName=iconv("gb2312","utf-8",$fileName);
readFileList($dirName.'/'.$fileName,$extFormat);
//echo $dirName.'/'.$fileName."
";
//die("读取目录出错");
}
}

}
return $fileList;
}
@closedir($fd);
}
//迭代数据
function iter(&$arr)
{
if(!is_array($arr))
return;
//$num=count($arr);
$list=array();
//echo "length".$num;
foreach($arr as $name=>$val)
{
foreach($val as $item)
array_push($list,$item);
}
return $list;
}

//组装批量插入数据
function mysqliInsert(&$arr)
{
$conn= new MySQLi("localhost", "root", "cai123", "test");
if(!$conn)
{
die("连接服务器失败".$conn->error);
}
$conn->query("set names utf8");
$sql="INSERT INTO img(link) VALUES";
foreach ($arr as $key=>$val)
{
foreach($val as $ads)
{
$sql .= "('".$ads."'),";

}
}
//去掉最后一个逗号,
$sql=substr($sql,0, strlen($sql)-1);
//echo $sql;

$res = $conn->query($sql);

if(!$res)
{
die("插入数据库失败".$conn->error);
}
else
{
if($conn->affected_rows>0)
{
exit();
}
else
{
die("没有产生影响");
}
}
$conn->close();

}
//预处理批量插入数据
function stmtInsert(&$arr)
{
$conn= new mysqli("localhost", "root", "cai123", "test");
if(mysqli_connect_errno())
{
die(mysqli_connect_error());
}
$conn->query("set names utf8");
$sql="insert into img(link) values(?)";
$stmt=$conn->prepare($sql);
$stmt->bind_param("s",$items);

foreach($arr as $key=>$links)
{
foreach ($links as $item)
{
$items=$item;
$b=$stmt->execute();
if(!$b)
{
die($conn->error);
}
}
}
$stmt->close();
$conn->close();
}
//
function showFile()
{
$conn= new MySQLi("localhost", "root", "cai123", "test");
if(!$conn)
{
die("连接服务器失败".$conn->error);
}
$conn->query("set names utf8");
$sql=" SELECT * FROM img GROUP BY link";
$res=$conn->query($sql);
if(!$res)
{
die("查询失败");
}
while($row=$res->fetch_assoc())
{
echo "路径中包含汉语言时,PHP解析失败
";
}
$res->free();
}

//转换window环境下路径的默认分隔符\为PHP识别更好的/
function transPathSep($path)
{
//$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//$path=iconv($encoding,"utf-8",$path);

$system=$_SERVER["SERVER_SOFTWARE"];
$pat="#\((.*?)\)#";
$sysVer=null;
if(preg_match($pat,$system,$match))
{
$sysVer=$match[1];
}
else
{
die("匹配系统类型失败");
}
if(strtolower($sysVer)=="win32")
{
$realPath=str_replace("\\","/",$path);
return $realPath;
}
}

//static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array());
//在此处可以决定把什么类型的文件,归类到哪种类型
$imgFormat=array("img"=>array("jpg", "jpeg","png", "bmp","gif","ico"),
"video"=>array("avi","dvix","div","xvid","mpge","mpg","dat","wmv","asx","rm","rmvb","mov","qt","3gp","3g2","mp4","m4v","flv"),
"sound"=>array("mp3","wav","wma","ape","mod","aiff","voc","vov","asf"),
"text"=>array("word","txt","pdf","chm","ppt","ini","html","css","js"),
"pack"=>array("rar" ,"zip","7z","cab","arj","lzh","tar","gz","ace","uue","bz2","jar","iso","mpq"),
"execute"=>array("exe","bat","msi"));



/* 遇到一个问题,当路径中出现中文时,会提示打开目录失败,且显示为乱码,比如$dir="E:\Books\php\php 程序设计";时会出现,CSDN的可用分只有33了..............蛋疼
*/
$dir="C:\Windows\Boot";

$path=transPathSep($dir);

//echo pathinfo($path)["dirname"];
$arr=readFileList($path,$imgFormat);

echo "
";
print_r($arr);
echo "
";
//read100Bytes($arr);

echo "
OK";
?>




------解决思路----------------------
你这样能不出错吗?
while($fileName=readdir($fd))
{
$encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
$fileName=iconv($encoding,"utf-8",$fileName); //非utf-8的文件名被转成了utf-8的,那么文件还能存在吗?

if($fileName !="." && $fileName !="..")
{

if(is_file($dirName.'/'.$fileName))
路径中包含汉语言时,PHP解析失败

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。

相关文章

相关视频