欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

Java中的StringBuilder性能测试

程序员文章站 2024-03-01 09:49:34
在看kmp算法时,想要简单的统计一下执行时间和性能。 得出的结论是: java的string的indexof方法性能最好,其次是kmp算法,其次是传统的bf算法,当然,对...

在看kmp算法时,想要简单的统计一下执行时间和性能。

得出的结论是: java的string的indexof方法性能最好,其次是kmp算法,其次是传统的bf算法,当然,对比有点牵强,sun的算法也使用java来实现、用的看着不像是kmp,还需要详细研究一下。

测试代码如下所示:

package com.test.test.kmp;

import java.util.random;

public class kmptest {

	public static void main(string[] args) {
		test();
	}
	
	//
	public static void test() {
		//
		int alllen = 8000000;
		int sublen = 11;
		int charlen = 4;
		string cl = buildstring(charlen);
		int times = 50;
		//
		testmatch(alllen, sublen, cl, times);
	}
	
	public static void testmatch(int alllen, int sublen, string cl, int times){
		int allbf = 0;
		int allstring = 0;
		int allkmp = 0;
		int allgc = 0;
		int allbuild = 0;
		int alltoarray = 0;
		
		long start = system.currenttimemillis();
		
		system.out.println("--------------------------");
		for (int i = 0; i < times; i++) {
			// 1. 构造字符串的损耗
			long buildstart = system.currenttimemillis();
			string sub = buildstring(sublen, cl);
			string all = buildstring(alllen, sub) +sub;
			long buildend = system.currenttimemillis();
			allbuild += (buildend - buildstart);
			// 2. tochararray的损耗
			long toarraystart = system.currenttimemillis();
			char[] allarray = all.tochararray();
			char[] subarray = sub.tochararray();
			long toarrayend = system.currenttimemillis();
			alltoarray += (toarrayend - toarraystart);
			//
			long startbf = system.currenttimemillis();
			int indexbf = bfindexof(all, sub);
			long endbf = system.currenttimemillis();
			//
			long timeoutbf = endbf - startbf;
			allbf += timeoutbf;
			system.gc();
			allgc += (system.currenttimemillis() - endbf);
	
			//
			long startstring = system.currenttimemillis();
			int indexstring = stringindexof(all, sub);
			long endstring = system.currenttimemillis();
			//
			long timeoutstring = endstring - startstring;
			allstring += timeoutstring;
			system.gc();
			allgc += (system.currenttimemillis() - endstring);
			

			//
			long startkmp = system.currenttimemillis();
			//int indexkmp = kmpindexof(all, sub);
			int indexkmp = kmp_index(allarray, subarray);
			long endkmp = system.currenttimemillis();
			//
			long timeoutkmp = endkmp - startkmp;
			allkmp += timeoutkmp;
			system.gc();
			allgc += (system.currenttimemillis() - endkmp);
			
			//
			//system.out.println("all="+all.substring(0,100)+" ......");
			//system.out.println("sub="+sub);
			//
			//system.out.println("indexbf="+indexbf+";耗时: "+timeoutbf+"ms");
			//system.out.println("indexstring="+indexstring+";耗时: "+timeoutstring+"ms");
			if(indexbf == indexstring && indexkmp == indexstring){
				//system.out.println("!!!!对比相等。");
			} else {
				throw new runtimeexception("对比出错.");
			}
			
			//
			if(i % 20 == 10){
				system.out.println(i+"次bfindexof= "+allbf+"ms");
				system.out.println(i+"次stringindexof= "+allstring+"ms");
				system.out.println(i+"次kmpindexof= "+allkmp+"ms");
				system.out.println(i+"次allbuild= "+allbuild+"ms");
				system.out.println(i+"次alltoarray= "+alltoarray+"ms");
				system.out.println(i+"*3次,gc= "+allgc+"ms");
				long end = system.currenttimemillis();
				long alltime = end-start;
				long losstime = alltime - (allbf+allstring+allkmp+allgc);
				system.out.println(i+"次,所有总计耗时: "+(alltime)+"ms; 损耗:" + losstime +"ms; 损耗比: " +((0.0+losstime)/alltime * 100)+"%");
				system.out.println("--------------------------");
			}
			
		}
		//
		system.out.println(times+"次bfindexof;总计耗时: "+allbf+"ms");
		system.out.println(times+"次stringindexof;总计耗时: "+allstring+"ms");
		system.out.println(times+"次kmpindexof;总计耗时: "+allkmp+"ms");
		system.out.println(times+"次allbuild= "+allbuild+"ms");
		system.out.println(times+"次alltoarray= "+alltoarray+"ms");
		system.out.println(times+"*3次,gc;总计耗时: "+allgc+"ms");
		long end = system.currenttimemillis();
		long alltime = end-start;
		long losstime = alltime - (allbf+allstring+allkmp+allgc);
		system.out.println(times+"次,所有总计耗时: "+(alltime)+"ms; 损耗:" + losstime +"ms; 损耗比: " +((0.0+losstime)/alltime * 100)+"%");
		system.out.println("--------------------------");
		
	}
	
	//
	
	
	// 构建字符

	public static string buildstring(int len){
		return buildstring(len, null);
	}
	public static string buildstring(int len, string sub){
		//
		final int type_number = 0;
		final int type_lowercase = 1;
		final int type_uppercase = 2;
		//
		long seed = system.nanotime();
		random random = new random(seed);
		//
		//
		stringbuilder builder = new stringbuilder();
		for (int i = 0; i < len; i++) {
			//
			int type = random.nextint(3);// 0-2
			int cvalue = 0;
			if(type_number == type){
				cvalue = '0' + random.nextint(10);// 0~(n-1)
			} else if(type_lowercase == type){
				cvalue = 'a' + random.nextint(26);// 0~(n-1)
			} else if(type_uppercase == type){
				cvalue = 'a' + random.nextint(26);// 0~(n-1)
			} else {
				throw new runtimeexception(random.class.getname()+"#newxtint(int);wrong!");
			}
			//
			//cvalue = 'a' + random.nextint(26);// 0~(n-1)
			//
			char c = (char)cvalue;
			if(null != sub && sub.length() > 1){
				int index = random.nextint(sub.length());
				c = sub.charat(index);
			}
			//string s = string.valueof(c);
			builder.append(c);
			//
		}
		//
		return builder.tostring();
	}
	
	


	/**
	 * java自带的方法,内部实现了
	 * @param all
	 * @param sub
	 * @return
	 */
	public static int stringindexof(string all, string sub){
		// 防御式编程
		if(null == all || null== sub){
			return -1;
		}
		// 调用java的string方法
		return all.indexof(sub);
	}
	
	
	/**
	 * bf算法
	 * @param all
	 * @param sub
	 * @return
	 */
	public static int bfindexof(string all, string sub){
		// 防御式编程
		if(null == all || null== sub){
			return -1;
		}
		//
		int lenall = all.length();
		int lensub = sub.length();
		int i = 0;
		while (i < lenall) {
			// 从i下标开始对比
			int j = 0;
			while (j < lensub) {
				//
				char all_i = all.charat(i);
				char sub_j = sub.charat(j);
				if(all_i == sub_j){
					// 相等则继续对比下一个字符
					i++;
					j++; // 这个增长很重要
				} else {
					// 否则跳出内层循环
					break;
				}
			}
			// 如果 j 增长到和 lensub相等,则匹配成功
			if(lensub == j){
				return i - lensub;
			} else {
				i = 1+i - j; // 回溯 i
			}
		}
		//
		return -1;
	}
	

	/**
	 * kmp 算法
	 * @param all
	 * @param sub
	 * @return
	 */
	public static int kmpindexof(string all, string sub){
		//
		char[] allarray = all.tochararray(); 
		char[] subarray = sub.tochararray(); 
		//
		return kmp_index(allarray, subarray);
	}
  /** 
   * 获得字符串的next函数值 
   * 
   * @param t 
   *      字符串 
   * @return next函数值 
   */ 
  public static int[] next(char[] t) { 
    int[] next = new int[t.length]; 
    next[0] = -1; 
    int i = 0; 
    int j = -1; 
    while (i < t.length - 1) { 
      if (j == -1 || t[i] == t[j]) { 
        i++; 
        j++; 
        if (t[i] != t[j]) { 
          next[i] = j; 
        } else { 
          next[i] = next[j]; 
        } 
      } else { 
        j = next[j]; 
      } 
    } 
    return next; 
  } 
 
  /** 
   * kmp匹配字符串 
   * 
   * @param allarray 
   *      主串 
   * @param subarray 
   *      模式串 
   * @return 若匹配成功,返回下标,否则返回-1 
   */ 
	public static int kmp_index(char[] allarray, char[] subarray) { 
    int[] next = next(subarray); 
    int i = 0; 
    int j = 0; 
    while (i <= allarray.length - 1 && j <= subarray.length - 1) { 
      if (j == -1 || allarray[i] == subarray[j]) { 
        i++; 
        j++; 
      } else { 
        j = next[j]; 
      } 
    } 
    if (j < subarray.length) { 
      return -1; 
    } else 
      return i - subarray.length; // 返回模式串在主串中的头下标 
  } 
}

测试的一个结果如下所示:

--------------------------
10次bfindexof= 94ms
10次stringindexof= 56ms
10次kmpindexof= 76ms
10次allbuild= 5870ms
10次alltoarray= 137ms
10*3次,gc= 155ms
10次,所有总计耗时: 6388ms; 损耗:6007ms; 损耗比: 94.03569192235442%
--------------------------
30次bfindexof= 365ms
30次stringindexof= 222ms
30次kmpindexof= 295ms
30次allbuild= 16452ms
30次alltoarray= 395ms
30*3次,gc= 452ms
30次,所有总计耗时: 18184ms; 损耗:16850ms; 损耗比: 92.66388033435987%
--------------------------
50次bfindexof;总计耗时: 550ms
50次stringindexof;总计耗时: 335ms
50次kmpindexof;总计耗时: 450ms
50次allbuild= 26501ms
50次alltoarray= 637ms
50*3次,gc;总计耗时: 734ms
50次,所有总计耗时: 29211ms; 损耗:27142ms; 损耗比: 92.91705179555647%
--------------------------

从中可以看出来,使用stringbuilder构造字符串,800万*50次append大约消耗了26秒。换算下来每秒钟是1600万次。。。
看来是需要写一个专门计时的函数,本来junit是有自己的统计的,但是样本不太好做。

如此看来,数据的准备,也就是测试用例的选取很关键,可能需要先生成并写入到文本文件中。