欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

php权重计算方法代码分享

程序员文章站 2023-12-09 13:20:33
复制代码 代码如下:

复制代码 代码如下:

<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +------------------------------------------------------------------------
//  name       :   权重计算                                        
//  description:   稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测
//  date       :   2013/12/16 08:51

class weight {
    protected $adict = array(array());
    protected $aitems = array();
    protected $slastrule;
    protected $amatchs = array();
    protected $ashow = array();

 private function init() {
  //清空记录的匹配表和输出结果
  unset($this->ashow);
 }

    public function newitems($mitems) {
  //导入新的项目
  $this->aitems = (is_array($mitems))? $mitems: array($mitems);
  $this->init();
 }

 public function newtable(array $atable) {
        //导入新的对照表,并生成字典
        foreach($atable as $itablekey=>$stableline) {
            $atableline = explode(',', str_replace('|', ',', $stableline));
            $setter = function($v, $k, $parameter) {
                $k1 = $parameter[0]; $oweight = $parameter[1];
                $oweight->gendict($v, $k1);
            };
            array_walk($atableline, $setter, array($itablekey, $this));
        }
        $this->init();
 }

    public function getshow($srule = 'max') {
  //获取最终的显示结果
        if(empty($this->aitems) || empty($this->adict))
            return array();
  if (empty($this->ashow) || $srule != $this->slastrule)
            return $this->genshow($srule);
        return $this->ashow;
 }

    public function genshow($srule) {
        $ashow = array();
        $amatchs = array();
  $getter = function($v, $k, $oweight) use(&$ashow, &$amatchs, $srule) {
   $t = array_count_values($oweight->matchword($v));
            $amatchs[] = $t;
            switch ($srule) {
                case 'max':
                    $ashow[$k] = array_keys($t, max($t)); 
                    break;
            }
  };
  array_walk($this->aitems, $getter, $this);
  $this->ashow = $ashow;
  $this->amatchs = $amatchs;
  return $ashow;
    }

    private function gendict($mword, $ikey = '') {
        $iinsertponit = count($this->adict);
        $icur = 0; //当前节点号
        foreach (str_split($mword) as $ichar) {
            if (isset($this->adict[$icur][$ichar])) {
                $icur = $this->adict[$icur][$ichar];
                continue;
            }
            $this->adict[$iinsertponit] = array();
            $this->adict[$icur][$ichar] = $iinsertponit;
            $icur = $iinsertponit;
            $iinsertponit++;
        }
        $this->adict[$icur]['acc'][] = $ikey;

    }

        function matchword($sline) {
            $icur = $ioffset = $iposition = 0;
            $sline .= "\0";
            $ilen = strlen($sline);
            $areturn = array();
            while($ioffset < $ilen) {
                $schar = $sline{$ioffset};
                if(isset($this->adict[$icur][$schar])) {
                    $icur = $this->adict[$icur][$schar];
                    if(isset($this->adict[$icur]['acc'])) {
                        $areturn = array_merge($areturn, $this->adict[$icur]['acc']);

                        $iposition = $ioffset + 1;
                        $icur = 0;
                    }
                } else {
                    $icur = 0;
                    $ioffset = $iposition;
                    $iposition = $ioffset + 1;
                }
                ++$ioffset;
            }
            return $areturn;
        }
}

?>

外部调用示例

复制代码 代码如下:

$aitems = array(
    'chinaisbig',
    'whichisnot',
    'totalyrightforme',
);
$atable = array(
    'china,is|small',
    'china,big|me',
    'china,is|big,which|not,me',
    'totaly|right,for,me',
);

$oweight = new ttrie;
$oweight->newitems($aitems);
$aresult = $oweight->newtable($atable);