java 字符串词频统计实例代码
程序员文章站
2023-12-12 18:00:16
复制代码 代码如下:package com.gpdi.action; import java.util.arraylist; import java.util.collec...
复制代码 代码如下:
package com.gpdi.action;
import java.util.arraylist;
import java.util.collections;
import java.util.hashmap;
import java.util.list;
import java.util.map;
public class wordsstatistics {
class obj {
int count ;
obj(int count){
this.count = count;
}
}
public list<wordcount> statistics(string word) {
list<wordcount> rs = new arraylist<wordcount>();
map <string,obj> map = new hashmap<string,obj>();
if(word == null ) {
return null;
}
word = word.tolowercase();
word = word.replaceall("'s", "");
word = word.replaceall(",", "");
word = word.replaceall("-", "");
word = word.replaceall("\\.", "");
word = word.replaceall("'", "");
word = word.replaceall(":", "");
word = word.replaceall("!", "");
word = word.replaceall("\n", "");
string [] wordarray = word.split(" ");
for(string simpleword : wordarray) {
simpleword = simpleword.trim();
if (simpleword != null && !simpleword.equalsignorecase("")) {
obj cnt = map.get(simpleword);
if ( cnt!= null ) {
cnt.count++;
}else {
map.put(simpleword, new obj(1));
}
}
}
for(string key : map.keyset()) {
wordcount wd = new wordcount(key,map.get(key).count);
rs.add(wd);
}
collections.sort(rs, new java.util.comparator<wordcount>(){
@override
public int compare(wordcount o1, wordcount o2) {
int result = 0 ;
if (o1.getcount() > o2.getcount() ) {
result = -1;
}else if (o1.getcount() < o2.getcount()) {
result = 1;
}else {
int strrs = o1.getword().comparetoignorecase(o2.getword());
if ( strrs > 0 ) {
result = 1;
}else {
result = -1 ;
}
}
return result;
}
});
return rs;
}
public static void main(string args[]) {
string word = "pinterest is might be aa ab aa ab marketer's dream - ths site is largely used to curate products " ;
wordsstatistics s = new wordsstatistics();
list<wordcount> rs = s.statistics(word);
for(wordcount word1 : rs) {
system.out.println(word1.getword()+"*"+word1.getcount());
}
}
}