Java 位图法排序的使用方法
java jdk里面容器类的排序算法使用的主要是插入排序和归并排序,可能不同版本的实现有所不同,关键代码如下:
/**
* performs a sort on the section of the array between the given indices
* using a mergesort with exponential search algorithm (in which the merge
* is performed by exponential search). n*log(n) performance is guaranteed
* and in the average case it will be faster then any mergesort in which the
* merge is performed by linear search.
*
* @param in -
* the array for sorting.
* @param out -
* the result, sorted array.
* @param start
* the start index
* @param end
* the end index + 1
*/
@suppresswarnings("unchecked")
private static void mergesort(object[] in, object[] out, int start,
int end) {
int len = end - start;
// use insertion sort for small arrays
if (len <= simple_length) {
for (int i = start + 1; i < end; i++) {
comparable<object> current = (comparable<object>) out[i];
object prev = out[i - 1];
if (current.compareto(prev) < 0) {
int j = i;
do {
out[j--] = prev;
} while (j > start
&& current.compareto(prev = out[j - 1]) < 0);
out[j] = current;
}
}
return;
}
int med = (end + start) >>> 1;
mergesort(out, in, start, med);
mergesort(out, in, med, end);
// merging
// if arrays are already sorted - no merge
if (((comparable<object>) in[med - 1]).compareto(in[med]) <= 0) {
system.arraycopy(in, start, out, start, len);
return;
}
int r = med, i = start;
// use merging with exponential search
do {
comparable<object> fromval = (comparable<object>) in[start];
comparable<object> rval = (comparable<object>) in[r];
if (fromval.compareto(rval) <= 0) {
int l_1 = find(in, rval, -1, start + 1, med - 1);
int tocopy = l_1 - start + 1;
system.arraycopy(in, start, out, i, tocopy);
i += tocopy;
out[i++] = rval;
r++;
start = l_1 + 1;
} else {
int r_1 = find(in, fromval, 0, r + 1, end - 1);
int tocopy = r_1 - r + 1;
system.arraycopy(in, r, out, i, tocopy);
i += tocopy;
out[i++] = fromval;
start++;
r = r_1 + 1;
}
} while ((end - r) > 0 && (med - start) > 0);
// copy rest of array
if ((end - r) <= 0) {
system.arraycopy(in, start, out, i, med - start);
} else {
system.arraycopy(in, r, out, i, end - r);
}
}
看到编程珠玑上有一个很有趣的排序算法-位图法其思想是用1位来表示[0~n-1]中的整数是否存在。1表示存在,0表示不存在。即将正整数映射到bit集合中,每一个bit代表其映射的正整数是否存在。
比如{1,2,3,5,8,13}使用下列集合表示:
0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0
伪代码如下:
for (i in [0~n-1]) bit[i] = 0;
for(i in [0~n-1])
if (i in input file)
bit[i] = 1
for(i in [0~n-1])
if(bit[i] == 1)
output i
用java 代码尝试下,效率果然不错:
public class javauniquesort {
public static int[] temp = new int[1000001];
public static list<integer> templist = new arraylist<integer>();
public static int count;
public static void main(final string[] args) {
list<integer> firstnum = new arraylist<integer>();
list<integer> secondnum = new arraylist<integer>();
for (int i = 1; i <= 1000000; i++) {
firstnum.add(i);
secondnum.add(i);
}
collections.shuffle(firstnum);
collections.shuffle(secondnum);
getstarttime();
collections.sort(firstnum);
getendtime("java sort run time ");
getstarttime();
secondnum = uniquesort(secondnum);
getendtime("uniquesort run time ");
}
public static list<integer> uniquesort(final list<integer> uniquelist) {
javauniquesort.templist.clear();
for (int i = 0; i < javauniquesort.temp.length; i++) {
javauniquesort.temp[i] = 0;
}
for (int i = 0; i < uniquelist.size(); i++) {
javauniquesort.temp[uniquelist.get(i)] = 1;
}
for (int i = 0; i < javauniquesort.temp.length; i++) {
if (javauniquesort.temp[i] == 1) {
javauniquesort.templist.add(i);
}
}
return javauniquesort.templist;
}
public static void getstarttime() {
javashuffle.start = system.nanotime();
}
public static void getendtime(final string s) {
javashuffle.end = system.nanotime();
system.out.println(s + ": " + (javashuffle.end - javashuffle.start) + "ns");
}
}
运行时间:
java sort run time : 1257737334ns
uniquesort run time : 170228290ns
java sort run time : 1202749828ns
uniquesort run time : 169327770ns
如果有重复数据,可以修改下:
public class javaduplicatesort {
public static list<integer> templist = new arraylist<integer>();
public static int count;
public static void main(final string[] args) {
random random = new random();
list<integer> firstnum = new arraylist<integer>();
list<integer> secondnum = new arraylist<integer>();
for (int i = 1; i <= 100000; i++) {
firstnum.add(i);
secondnum.add(i);
firstnum.add(random.nextint(i + 1));
secondnum.add(random.nextint(i + 1));
}
collections.shuffle(firstnum);
collections.shuffle(secondnum);
getstarttime();
collections.sort(firstnum);
getendtime("java sort run time ");
getstarttime();
secondnum = uniquesort(secondnum);
getendtime("uniquesort run time ");
}
public static list<integer> uniquesort(final list<integer> uniquelist) {
javaduplicatesort.templist.clear();
int[] temp = new int[200002];
for (int i = 0; i < temp.length; i++) {
temp[i] = 0;
}
for (int i = 0; i < uniquelist.size(); i++) {
temp[uniquelist.get(i)]++;
}
for (int i = 0; i < temp.length; i++) {
for (int j = temp[i]; j > 0; j--) {
javaduplicatesort.templist.add(i);
}
}
return javaduplicatesort.templist;
}
public static void getstarttime() {
javashuffle.start = system.nanotime();
}
public static void getendtime(final string s) {
javashuffle.end = system.nanotime();
system.out.println(s + ": " + (javashuffle.end - javashuffle.start) + "ns");
}
}
这种算法还是有很明显的局限性的,比如说要知道数据中最大的数值,更重要的是数据的疏密程度,比如说最大值为1000000而要数组大小只有100,那么效率会下降的非常明显。。。。。但是,使用位图法进行排序,确实让人眼前一亮。位图法通常是用来存储数据,判断某个数据存不存在或者判断数组是否存在重复 。