欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

敏感词过滤

程序员文章站 2022-05-19 18:28:15
...

适用于规模较大的环境 无 /** * 禁词过滤 * 执行效率:每篇用时0.05秒 * @author liuxu * */class Logic_BlackWord{const APP_FORUM= 1;const APP_BLOG= 2;const APP_VOTE= 3;/** * 过滤得到禁词 * @param unknown $txt * @return Ambigous multitype:, unkno

适用于规模较大的环境
/**
 * 禁词过滤
 * 执行效率:每篇用时0.05秒
 * @author liuxu
 *
 */
class Logic_BlackWord
{
	
	const APP_FORUM	= 1;
	const APP_BLOG	= 2;
	const APP_VOTE	= 3;

	/**
	 * 过滤得到禁词
	 * @param unknown $txt
	 * @return Ambigous 
	 */
	public function getHitList($txt)
	{
		$hitList = array();

		//对禁词分批过滤
		$max = $this->getMax();
		if($max)
		{
			$size = 1000;
			$last = ceil($max/$size);
			for($page=1;$pagegetHitListByPage($txt,$page,$size);
				if($result) $hitList = array_merge($hitList,$result);
			}
		}

		$hitList2 = array();
		foreach($hitList as $hit=>$type)
		{
			$hitList2[$type][] = $hit;
		}

		return $hitList2;
	}

	private function getMax()
	{
		$redis = Rds::factory();
		$memKey = 'blackWord_max';
		$max = $redis->get($memKey);
		if($max===false)
		{
			$max = 0;
			$blackWord = new Model_BlackWord_BlackWord();
			$para['field'] = "MAX(id) AS max";
			$result = $blackWord->search($para);
			if(isset($result[0]['max'])) $max = $result[0]['max'];

			$redis->setex($memKey,300,$max);
		}

		return $max;
	}

	/**
	 * 分批过滤得到禁词
	 * @param unknown $txt
	 * @param number $page
	 * @param number $size
	 * @return multitype:Ambigous 
	 */
	private function getHitListByPage($txt,$page=1,$size=1000)
	{
		$hitList = array();

		//分批得到禁词树
		$wordTree = $this->getWordTreeByPage($page,$size);
	
		$txt = strip_tags($txt);
		$txt = preg_replace('/[^a-zA-Z0-9\x{4e00}-\x{9fa5}]/iu','',$txt);

		$len = mb_strlen($txt,'UTF-8');
		for($i=0;$igetHitListByTree(mb_substr($txt,$i,50,'UTF-8'),$wordTree);
				if($result)
				{
					foreach($result as $hit=>$type)
					{
						$hitList[$hit] = $type;
					}
				}
			}
		}

		return $hitList;
	}
	
	/**
	 * 是否禁词
	 * @param str $txt
	 * @param arr $wordTree
	 * @return multitype:unknown
	 */
	private function getHitListByTree($txt,&$wordTree)
	{
		$len = mb_strlen($txt,'UTF-8');
		$point = & $wordTree;
		$hit = '';
		$hitList = array();
		for($i=0;$iget($memKey);
		if($wordTree===false)
		{
			$wordTree = array();
			$blackWord = new Model_BlackWord_BlackWord();
			$start = ($page-1)*$size;
			$end = $start + $size;
			$para['where'] = "status=1 AND id>".$start." AND idsearch($para);
			if($result)
			{
				foreach($result as $value)
				{
					if($value['word'])
					{
						$value['word'] = preg_split('/(?setex($memKey,300,$wordTree);
		}

		return $wordTree;
	}

}