IKAnalyzer分词器
程序员文章站
2024-02-21 22:33:46
...
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
public class TestJeAnalyzer {
private static String testString1 = "中国人是最美丽的,钓鱼岛是中国的地盘";
public static Map<String, String> segMore(String text) {
Map<String, String> map = new HashMap<>();
map.put("智能切分", segText(text, true));
map.put("细粒度切分", segText(text, false));
return map;
}
private static String segText(String text, boolean useSmart) {
StringBuilder result = new StringBuilder();
IKSegmenter ik = new IKSegmenter(new StringReader(text), useSmart);
try {
Lexeme word = null;
while((word=ik.next())!=null) {
result.append(word.getLexemeText()).append(" ");
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return result.toString();
}
public static void main(String[] args) throws Exception{
// String testString = testString1;
String testString = testString1;
System.out.println(segMore(testString));
}
}
对应的JAR包在附件中下载。