分享一个PHP中文分词
程序员文章站
2024-01-03 23:01:22
...
- /**
- * 中文分词处理方法
- *+---------------------------------
- * @param stirng $string 要处理的字符串
- * @param boolers $sort=false 根据value进行倒序
- * @param Numbers $top=0 返回指定数量,默认返回全部
- *+---------------------------------
- * @return void
- */
- function scws($text, $top = 5, $return_array = false, $sep = ',') {
- include('./pscws4/pscws4.php');//去下面给的网址把pscws4下载下来
- $cws = new pscws4('utf-8');
- $cws -> set_charset('utf-8');
- $cws -> set_dict('./pscws4/etc/dict.utf8.xdb');
- $cws -> set_rule('./pscws4/etc/rules.utf8.ini');
- //$cws->set_multi(3);
- $cws -> set_ignore(true);
- //$cws->set_debug(true);
- //$cws->set_duality(true);
- $cws -> send_text($text);
- $ret = $cws -> get_tops($top, 'r,v,p');
- $result = null;
- foreach ($ret as $value) {
- if (false === $return_array) {
- $result .= $sep . $value['word'];
- } else {
- $result[] = $value['word'];
- }
- }
- return false === $return_array ? substr($result, 1) : $result;
- }
- print_r(scws('青花夔龙纹香炉'));