php权重计算方法代码分享
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +------------------------------------------------------------------------
// name : 权重计算
// description: 稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测
// date : 2013/12/16 08:51
class weight {
protected $adict = array(array());
protected $aitems = array();
protected $slastrule;
protected $amatchs = array();
protected $ashow = array();
private function init() {
//清空记录的匹配表和输出结果
unset($this->ashow);
}
public function newitems($mitems) {
//导入新的项目
$this->aitems = (is_array($mitems))? $mitems: array($mitems);
$this->init();
}
public function newtable(array $atable) {
//导入新的对照表,并生成字典
foreach($atable as $itablekey=>$stableline) {
$atableline = explode(',', str_replace('|', ',', $stableline));
$setter = function($v, $k, $parameter) {
$k1 = $parameter[0]; $oweight = $parameter[1];
$oweight->gendict($v, $k1);
};
array_walk($atableline, $setter, array($itablekey, $this));
}
$this->init();
}
public function getshow($srule = 'max') {
//获取最终的显示结果
if(empty($this->aitems) || empty($this->adict))
return array();
if (empty($this->ashow) || $srule != $this->slastrule)
return $this->genshow($srule);
return $this->ashow;
}
public function genshow($srule) {
$ashow = array();
$amatchs = array();
$getter = function($v, $k, $oweight) use(&$ashow, &$amatchs, $srule) {
$t = array_count_values($oweight->matchword($v));
$amatchs[] = $t;
switch ($srule) {
case 'max':
$ashow[$k] = array_keys($t, max($t));
break;
}
};
array_walk($this->aitems, $getter, $this);
$this->ashow = $ashow;
$this->amatchs = $amatchs;
return $ashow;
}
private function gendict($mword, $ikey = '') {
$iinsertponit = count($this->adict);
$icur = 0; //当前节点号
foreach (str_split($mword) as $ichar) {
if (isset($this->adict[$icur][$ichar])) {
$icur = $this->adict[$icur][$ichar];
continue;
}
$this->adict[$iinsertponit] = array();
$this->adict[$icur][$ichar] = $iinsertponit;
$icur = $iinsertponit;
$iinsertponit++;
}
$this->adict[$icur]['acc'][] = $ikey;
}
function matchword($sline) {
$icur = $ioffset = $iposition = 0;
$sline .= "\0";
$ilen = strlen($sline);
$areturn = array();
while($ioffset < $ilen) {
$schar = $sline{$ioffset};
if(isset($this->adict[$icur][$schar])) {
$icur = $this->adict[$icur][$schar];
if(isset($this->adict[$icur]['acc'])) {
$areturn = array_merge($areturn, $this->adict[$icur]['acc']);
$iposition = $ioffset + 1;
$icur = 0;
}
} else {
$icur = 0;
$ioffset = $iposition;
$iposition = $ioffset + 1;
}
++$ioffset;
}
return $areturn;
}
}
?>
外部调用示例
$aitems = array(
'chinaisbig',
'whichisnot',
'totalyrightforme',
);
$atable = array(
'china,is|small',
'china,big|me',
'china,is|big,which|not,me',
'totaly|right,for,me',
);
$oweight = new ttrie;
$oweight->newitems($aitems);
$aresult = $oweight->newtable($atable);