欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

java 字符串词频统计实例代码

程序员文章站 2023-12-12 18:00:16
复制代码 代码如下:package com.gpdi.action; import java.util.arraylist; import java.util.collec...

复制代码 代码如下:

package com.gpdi.action;

import java.util.arraylist;
import java.util.collections;
import java.util.hashmap;
import java.util.list;
import java.util.map;

public class wordsstatistics {

    class obj {
        int count ;
        obj(int count){
            this.count = count;
        }
    }

    public list<wordcount> statistics(string word) {
        list<wordcount> rs = new arraylist<wordcount>();
        map <string,obj> map = new hashmap<string,obj>();

        if(word == null ) {
            return null;
        }
        word = word.tolowercase();
        word = word.replaceall("'s", "");
        word = word.replaceall(",", "");
        word = word.replaceall("-", "");
        word = word.replaceall("\\.", "");
        word = word.replaceall("'", "");
        word = word.replaceall(":", "");
        word = word.replaceall("!", "");
        word = word.replaceall("\n", "");

        string [] wordarray = word.split(" ");
        for(string simpleword : wordarray) {
            simpleword = simpleword.trim(); 
            if (simpleword != null && !simpleword.equalsignorecase("")) {
                obj cnt = map.get(simpleword);
                if ( cnt!= null ) {
                    cnt.count++;
                }else {
                    map.put(simpleword, new obj(1));
                }
            }
        }

        for(string key : map.keyset()) {
            wordcount wd = new wordcount(key,map.get(key).count);
            rs.add(wd);
        }

        collections.sort(rs, new java.util.comparator<wordcount>(){
            @override
            public int compare(wordcount o1, wordcount o2) {
                int result = 0 ;
                if (o1.getcount() > o2.getcount() ) {
                    result = -1;
                }else if (o1.getcount() < o2.getcount()) {
                    result = 1;
                }else {
                    int strrs = o1.getword().comparetoignorecase(o2.getword());
                    if ( strrs > 0 ) {
                        result = 1;
                    }else {
                        result = -1 ;
                    }
                }
                return result;
            }

        });
        return rs;
    }

     
    public static void main(string args[]) {
        string word = "pinterest is might be aa ab aa ab marketer's dream  - ths site is largely used to curate products " ;
        wordsstatistics s = new wordsstatistics();
        list<wordcount> rs = s.statistics(word);
        for(wordcount word1 : rs) {
            system.out.println(word1.getword()+"*"+word1.getcount());
        }
    }

}

上一篇:

下一篇: