jieba中文分词
程序员文章站
2022-04-03 14:55:16
...
package com.huaban.analysis.jieba.test; import java.util.Iterator; import java.util.List; import com.huaban.analysis.jieba.JiebaSegmenter; import com.huaban.analysis.jieba.SegToken; import com.huaban.analysis.jieba.JiebaSegmenter.SegMode; public class Test { public static void main(String[] args) { (new Test()).testDemo(); } public void testDemo() { JiebaSegmenter segmenter = new JiebaSegmenter(); // lots of sentences String[] sentences = new String[] {"这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。", "雷猴回归人间。", "工信处女干事每月经过下属科室都要亲口交代交换机等技术性器件的安装工作", "结果婚的和尚未结过婚的"}; for (String sentence : sentences) { System.out.println(segmenter.process(sentence, SegMode.INDEX).toString()); } // one sentence String sentence = "这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"; System.out.println(segmenter.process(sentence, SegMode.INDEX).toString()); // just get yourself format List<SegToken> resultList = segmenter.process(sentence, SegMode.INDEX); Iterator<SegToken> it = resultList.iterator(); if (!it.hasNext()) return ; StringBuilder sb = new StringBuilder(); while (it.hasNext()) { SegToken s = it.next(); if(!" ".equals(s.getWord())){ sb.append(s.getWord()).append(','); } } System.out.println(sb.toString()); } }
(PS:原项目已打包上传)
上一篇: 线程池及增长策略和拒绝策略 线程池