欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  后端开发

PHP制作百度词典查词采集器_PHP

程序员文章站 2022-06-01 18:33:53
...
PHP制作百度词典查词采集器_PHP

百度dict 采集样本

写的采集百度dict词典翻译后的所有结果数据,当然附带了13.5w单词库和采集简单的案例,这里我把写出的主要类dict.class.php放出来,项目地址http://github.com/widuu/baidu_dict,有需要的直接fork就可以了~么么哒,这东西用的人很少,所以有用的兄弟拿走了哈~

 音标
	 *				"pro"	 => 发音
	 *				"example"=> 例句
	 *				"explain"=> 简明释义
	 *				"synonym"=> 同反义词
	 *				"phrase" => 短语数组
	 *			)
   *
	 */
	public function content($word){
		 $this -> word = $word;
		 $symbol = $this -> Pronounced();
		 $pro	 = $this->getSay();
		 $example = $this -> getExample();
		 $explain = $this -> getExplain();
		 $synonym = $this -> getSynonym();
		 $phrase = $this -> getPhrase();
		 $result = array(
				"symbol" => $symbol,		//音标
				"pro"	 => $pro,			//发音
				"example"=> $example,		//例句
				"explain"=> $explain,		//简明释义
				"synonym"=> $synonym,		//同反义词
				"phrase" => $phrase 		//短语数组
			);
		return $result;
	}


	/**
   * 远程获取百度翻译内容
   * get function curl
   * retun string
   *
	 */

	private function getContent(){
 		$useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0";
 		$ch = curl_init();
 		$url = "http://dict.baidu.com/s?wd=".$this->word;
 		curl_setopt($ch, CURLOPT_URL, $url);
 		curl_setopt($ch, CURLOPT_USERAGENT,$useragent);
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); 
		curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
		curl_setopt($ch, CURLOPT_HTTPGET, 1);
		curl_setopt($ch, CURLOPT_AUTOREFERER,1);
		curl_setopt($ch, CURLOPT_HEADER, 0); 
		curl_setopt($ch, CURLOPT_TIMEOUT, 30);
		$result = curl_exec($ch);
		if (curl_errno($curl)) {
			echo 'Errno'.curl_error($curl);
		}
		curl_close($ch);
		return $result;
	}


	/**
   * 获取百度翻译发音
   * retun array(英,美)
   *
	 */

	private function Pronounced(){
		$data = $this -> getContent();
		preg_match_all("/\"EN\-US\"\>(.*)\/Ui",$data,$pronounced);
		return array(
			'en' => $pronounced[1][0],
			'us' => $pronounced[1][1]
		);
	}

	/**
	 * 获取百度翻译发音
	 * return array(英,美)
	 *
	 */

	private function getSay(){
		$data = $this -> getContent();
		preg_match_all("/url=\"(.*)\"/Ui",$data,$pronounced);
		return array(
			'en' => $pronounced[1][0],
			'us' => $pronounced[1][1]
		);	
	}

	/**
   * 获取百度翻译例句
   * return array() 多维数组 例句
   * 
	 */

	private function getExample(){
		$str = "";
		$data = $this -> getContent();
		preg_match_all("/var example_data = (.*)\]\;/Us",$data,$example);
	  $data1 = "[[[".ltrim($example[1][0],"[");
	  $data2 = explode("[[[",$data1);
	  $num = count(array_filter($data2));
		foreach($data2 as $key => $value){
		 	$data3 = explode("[[","[[".$value);
		 	foreach ($data3 as $k => $v) {
		 		preg_match_all("/\[\"(.*)\",/Us","[".$v, $match);
		 		if(!empty($match[1])){
		 			$str .= implode($match[1]," ")."@";
		 		}
		 	}
		}
		$data4 = trim($str,"@");
		$data5 = explode("@", $data4);
		$result = array_chunk($data5, 2);
		return $result;
	}

	/**
   * 获取简明释义
   * return array (x => "词性",b => "附属")
   * 
	 **/

	private function getExplain(){
		$data = $this -> getContent();
		preg_match_all("/id\=\"en\-simple\-means\"\>(.*)\
/Us",$data,$explain); $r_data = $explain[1][0]; preg_match_all("/\

\(?P.*)\\(?P.*)\\/Us", $r_data, $a_data); preg_match_all("/\(?P[^\>]+)\:\(?P.*)\\/Us", $r_data, $b_data); $result = array(); foreach ($a_data["adj"] as $key => $value) { $result[$value] = $a_data["name"][$key]; } $word_b = array(); foreach ($b_data["tag"] as $key => $value) { $word_b[$value] = strip_tags($b_data["word"][$key]); } $result_data = array("x" => $result,"b" => $word_b); return $result_data; } /** * 获取同义词 * return array(0 => "同义词", 1 => "反义词") 一般为多维数组 * */ private function getSynonym(){ $data = $this -> getContent(); preg_match_all("/id=\"en\-syn\-ant\"\>(.*)

/Us",$data,$synonym); $content = $synonym[1][0]; $data1 = explode("", $content); $result = array(); $data2 = array(); foreach ($data1 as $key => $value) { preg_match_all("/\(?P.*)\&nbsp\;\\\
\
    (?.*)\/Us", $value, $r_data); $data2[$key]["adj"] = $r_data["adj"]; $data2[$key]["content"] = $r_data["content"]; } foreach ($data2 as $key => $value) { foreach ($value["content"] as $k => $v) { if(!empty($v)){ preg_match_all("/\
  • \

    (?P

    .*)\(?P<value>.*)\/Us", $v, $v_data); foreach ($v_data['title'] as $m => $d) { $data = strip_tags(preg_replace(">"," ", $v_data["value"][$m])); $result[$key][$value["adj"][$k]][$d] = $data; } } } } return $result; } /** * 获取短语词组 * return array (key => value) 一维或者多维数组 * */ private function getPhrase(){ $num = self::$num; $data = $this -> getContent(); preg_match_all("/id=\"en\-phrase\"\>(.*)\<div class>/Us",$data,$phrase); $data = explode("",$phrase[1][0]); $data1 = array_slice($data,0,$num); $result = array(); foreach ($data1 as $key => $value) { $data2 = explode("", $value); $n = count($data2); if($n $value) { foreach ($value as $k => $v) { $value[$k] = strip_tags($v); } $array = array($result[$key_value],$value); if (array_key_exists($key_value, $result)){ $result[$key_value] = $array; } } } } return $result; } /** * 将数组转换为字符串 * * @param array $data 数组 * @param bool $isformdata 如果为0,则不使用new_stripslashes处理,可选参数,默认为1 * @return string 返回字符串,如果,data为空,则返回空 */ private function array2string($data, $isformdata = 1) { if($data == '') return ''; if($isformdata) $data = $this->new_stripslashes($data); return addslashes(var_export($data, TRUE)); } /** * 返回经stripslashes处理过的字符串或数组 * @param $string 需要处理的字符串或数组 * @return mixed */ private function new_stripslashes($string) { if(!is_array($string)) return stripslashes($string); foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val); return $string; } } // $word = new dict("express"); // $word ->content(); <p>以上就是本文的全部内容了,非常实用的功能,希望小伙伴们能够喜欢。</p> <a href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2suaHRtbD90PTI=" target="_blank" rel="nofollow"><img onerror="this.src='/statics/superweb999/images/image_error.jpg'"src="/default/index/img?u=aHR0cHM6Ly9pbWcucGhwLmNuL3VwbG9hZC9jb3Vyc2UvMDAwLzAwMC8wNzEvNjIwYzYwNGY3NTI1MTk5NS5wbmc=" style="margin-top: 30px;" alt="PHP制作百度词典查词采集器_PHP" title="PHP制作百度词典查词采集器_PHP"></a><p style="float:right;font-size:13px;color:#999;"><span class="red">声明:</span>本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。</p> </div> <p class="article-relative-header">相关文章</p> <p class="article-relative-header">相关视频</p> <hr class="layui-clear"> <ul class="article-relative-ul"> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL3BocC13ZWl6aWppYW9jaGVuZy00ODg5MTkuaHRtbA==" target="_blank" rel="nofollow">教你使用PHP数据库迁移工具“Phinx”</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL3BocC13ZWl6aWppYW9jaGVuZy00ODg4ODkuaHRtbA==" target="_blank" rel="nofollow">详解win10下PHP的安装配置(以php5.6为...</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL3BocC13ZWl6aWppYW9jaGVuZy00ODg4MzUuaHRtbA==" target="_blank" rel="nofollow">php Swoole实现毫秒定时计划任务(详解)</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL3BocC13ZWl6aWppYW9jaGVuZy0zNzk2My5odG1s" target="_blank" rel="nofollow">PHP 采集程序原理分析篇_php技巧</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL3BocC13ZWl6aWppYW9jaGVuZy01NDIxNS5odG1s" target="_blank" rel="nofollow">PHP制作百度词典查词采集器_PHP</a> </li> </ul> <ul class="article-relative-ul"> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2NvZGUvMi5odG1s" target="_blank" title="PHP开发基础教程之简介" rel="nofollow">PHP开发基础教程之简介</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2NvZGUvNC5odG1s" target="_blank" title="PHP新手入门之简介" rel="nofollow">PHP新手入门之简介</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2NvZGUvNS5odG1s" target="_blank" title="PHP开发基础教程之环境搭建" rel="nofollow">PHP开发基础教程之环境搭建</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2NvZGUvNi5odG1s" target="_blank" title="php学习路线介绍" rel="nofollow">php学习路线介绍</a> </li> <li> <span class="layui-badge-dots"></span><a class="relevant" href="/default/index/url?u=aHR0cHM6Ly93d3cucGhwLmNuL2NvZGUvNy5odG1s" target="_blank" title="php环境配置" rel="nofollow">php环境配置</a> </li> </ul></value>

专题推荐

作者信息
PHP制作百度词典查词采集器_PHP

认证0级讲师

推荐视频教程
  • PHP制作百度词典查词采集器_PHPjavascript初级视频教程
  • PHP制作百度词典查词采集器_PHPjquery 基础视频教程
  • 视频教程分类