欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

IKAnalyzer分词器

程序员文章站 2024-02-21 22:33:46
...
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;


public class TestJeAnalyzer {
            private static String testString1 = "中国人是最美丽的,钓鱼岛是中国的地盘";
      
            public static Map<String, String> segMore(String text) {
              Map<String, String> map = new HashMap<>();
              map.put("智能切分", segText(text, true));
              map.put("细粒度切分", segText(text, false));
              return map;
            }
            private static String segText(String text, boolean useSmart) {
              StringBuilder result = new StringBuilder();
              IKSegmenter ik = new IKSegmenter(new StringReader(text), useSmart);       
              try {
                Lexeme word = null;
                while((word=ik.next())!=null) {       
                  result.append(word.getLexemeText()).append(" ");
                }
              } catch (IOException ex) {
                throw new RuntimeException(ex);
              }
              return result.toString();
            } 
            
            
            public static void main(String[] args) throws Exception{
            // String testString = testString1;
            String testString = testString1;
            System.out.println(segMore(testString));
            
         
}

}

 

 

对应的JAR包在附件中下载。

相关标签: 分词