Java parallel Bucket Sort
程序员文章站
2022-04-01 16:52:43
...
这是桶排序的Java 平行计算版本,随着生成的随机整数的复杂度增加,也就是生成区间的不断扩大,加速比会明显增长。数据样本太简单的话,无法体现平行计算的威力。特此声明:桶排序平行化的基本思路来自于: Professor Ian Bond in Massey University. 初始版本用CPP+MPI 编写,我本人用Java重写了基本算法,而且增加了自己的算法,数据进桶等比例缩小和出桶时的等比例放大. 目前,只开了4个线程,读者可以根据自身情况手动增加线程,但是需要修改代码。你不能用循环动态建立线程,很慢!
这是初始版本,后期需要优化,也就是继续加速,增大加速比. 平行化是否成功的关键就是看加速比:linear time / parallel time
-Xms18024M -Xmx18024M
请注意,推荐内存 32 G,你可以根据自己的内存调整随机整数生成区间,当心JVM heap out of memory.
public class MyTimer {
private static long begin;
public static void time_start(){
begin = System.nanoTime();
}
public static double time_end_milli_seconds(){
return System.nanoTime()-begin/1e6;
}
public static double time_end_seconds(){
double total_time = (System.nanoTime()-begin)/1e6;
return ((double)((int)total_time))/1000;
}
}
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class RandomGenerator {
public static int[] random_generate(int min,int max,int numtrials){
int[] int_array = new int[numtrials];
Random random = new Random();
for (int i = 0; i < numtrials; i++) {
int random_num = random.nextInt(max)%(max-min+1) + min;
int_array[i] = random_num;
}
return int_array;
}
}
import java.util.ArrayList;
import java.util.List;
public class BucketSort {
public int[] bucket_sort(int[] original_array,int max){
// index: 0-99 but max is : 100 . the point is the array index for bucketsort
int[] store_array = new int[max+1];
for (int i = 0; i < original_array.length; i++) {
int int_num = original_array[i];
// here is the miracle of bucketsort
store_array[int_num]= ++store_array[int_num];
}
int[] final_sorted_array = new int[original_array.length];
int cherry_pointer = 0;
for (int cherry = 0; cherry < store_array.length; cherry++) {
int cherry_num = store_array[cherry];
for (int j = 0; j < cherry_num; j++) {
final_sorted_array[cherry_pointer]=cherry;
++cherry_pointer;
}
}
return final_sorted_array;
}
}
public class SmallBucketThread extends Thread{
public int[] original_array;
public int[] final_sorted_array;
public int bucket_num;
public int bucket_size;
public int shrink_number(int item,int bucket_num,int bucket_size){
return item-(bucket_num*bucket_size);
}
public int recover_number(int item,int bucket_num,int bucket_size){
return item+(bucket_num*bucket_size);
}
public SmallBucketThread(int[] original_array,int bucket_num,int bucket_size) {
this.original_array = original_array;
final_sorted_array = new int[original_array.length];
this.bucket_num = bucket_num;
this.bucket_size = bucket_size;
}
@Override
public void run() {
bucket_sort_parallel(original_array,bucket_size);
}
public void bucket_sort_parallel(int[] original_array,int max){
// index: 0-99 but max is : 100 . the point is the array index for bucketsort
int[] store_array = new int[max+1];
for (int i = 0; i < original_array.length; i++) {
// empty item in array is : 0
if (original_array[i]!=0){
// 71 -> 21 , 89->14 , save physical memory
int int_num = shrink_number(original_array[i],bucket_num,bucket_size);
// here is the miracle of bucketsort
store_array[int_num]= ++store_array[int_num];
}
}
int cherry_pointer = 0;
// loop the cherry store....
for (int cherry = 0; cherry < store_array.length; cherry++) {
int cherry_num = store_array[cherry];
for (int j = 0; j < cherry_num; j++) {
final_sorted_array[cherry_pointer] = recover_number(cherry,bucket_num,bucket_size);
++cherry_pointer;
}
}
}
}
import java.util.ArrayList;
import java.util.List;
public class BucketSortParallel {
public double bucket_sort_parallel(int min_cherry,int max_cherry,int cherry_total_num) throws InterruptedException {
MyTimer.time_start();
int bucket_size = cherry_total_num/4;
int[] random_array = RandomGenerator.random_generate(min_cherry,max_cherry,cherry_total_num);
// 1.576 secs
// System.out.println("random generated !"+MyTimer.time_end_seconds()+" s");
// MyTimer.time_start();
int[] arr0 = new int[cherry_total_num]; int pointer0 = 0;
int[] arr1 = new int[cherry_total_num]; int pointer1 = 0;
int[] arr2 = new int[cherry_total_num]; int pointer2 = 0;
int[] arr3 = new int[cherry_total_num]; int pointer3 = 0;
// 0.433 s
// System.out.println("int[] arr generated !"+MyTimer.time_end_seconds()+" s");
// MyTimer.time_start();
// range random number in different areas.... 1-25,26-50,51-75,76-100
for (int i = 0; i < random_array.length; i++) {
int bucket_num = random_array[i] / bucket_size;
switch (bucket_num)
{
case 0: arr0[pointer0] = random_array[i]; ++pointer0;
break;
case 1: arr1[pointer1] = random_array[i]; ++pointer1;
break;
case 2: arr2[pointer2] = random_array[i]; ++pointer2;
break;
case 3: arr3[pointer3] = random_array[i]; ++pointer3;
break;
}
} // all cherries are in the right area
// System.out.println("cherry buckets are ready !"+MyTimer.time_end_seconds()+" s");
// 1.401 s
// MyTimer.time_start();
// start parallel
SmallBucketThread thread0 = new SmallBucketThread(arr0,0,bucket_size);
SmallBucketThread thread1 = new SmallBucketThread(arr1,1,bucket_size);
SmallBucketThread thread2 = new SmallBucketThread(arr2,2,bucket_size);
SmallBucketThread thread3 = new SmallBucketThread(arr3,3,bucket_size);
thread0.start();
thread1.start();
thread2.start();
thread3.start();
thread0.join();
thread1.join();
thread2.join();
thread3.join();
List list = new ArrayList<int[]>();
list.add(thread0.final_sorted_array);
list.add(thread1.final_sorted_array);
list.add(thread2.final_sorted_array);
list.add(thread3.final_sorted_array);
// System.out.println("parallel is done !"+MyTimer.time_end_seconds()+" s");
// 1.586 s
double time_end = MyTimer.time_end_seconds();
// this.print_array(list);
return time_end;
}
public void print_array(List list)
{
for (int i = 0; i < list.size(); i++) {
int[] array = (int[]) list.get(i);
for (int j = 0; j < array.length; j++) {
if (array[j]!=0)
System.out.println(array[j]);
}
}
}
public void print_array(int[] array){
for (int j = 0; j < array.length; j++) {
System.out.print(array[j]+"-");
}
}
}
public class TestBucketSort {
public double linear_bucketsort(int min,int max,int num_trails){
MyTimer.time_start();
BucketSort bucketSort = new BucketSort();
RandomGenerator randomGenerator = new RandomGenerator();
int[] random_array = randomGenerator.random_generate(min,max,num_trails);
int[] sorted_array = bucketSort.bucket_sort(random_array,max);
double time_end = MyTimer.time_end_seconds();
/* for (int i = 0; i < sorted_array.length; i++) {
System.out.println(sorted_array[i]);
}*/
// System.out.println("Time elapse : "+time_end+"s sorted_array.length: "+sorted_array.length);
return time_end;
}
public static void main(String[] args) throws InterruptedException {
int min = 1;
int max = 1999999999; // 1.9 billion
int num_trails = 100000000; // 100 million
TestBucketSort testBucketSort = new TestBucketSort();
double linear_time = testBucketSort.linear_bucketsort(min,max,num_trails);
BucketSortParallel parallel = new BucketSortParallel();
double parallel_time = parallel.bucket_sort_parallel(min,max,num_trails);
System.out.println("linear_time: "+linear_time+" secs");
System.out.println("parallel_time: "+parallel_time+" secs");
System.out.println("speed up : "+ (linear_time/parallel_time));
}
}
推荐阅读
-
Java Collections.sort()实现List排序的默认方法和自定义方法
-
java插入排序 Insert sort实例
-
java插入排序 Insert sort实例
-
Java垃圾收集器——Parallel、G1收集器日志分析及性能调优示范
-
JAVA基于Arrays.sort()实现数组升序和降序
-
Java.lang.Arrays中的toString()和sort()基本使用形式
-
5.4 集合的排序(Java学习笔记)(Collections.sort(),及Arrays.sort()底层分析)
-
Java排序的那些事之sort方法的使用详解
-
java冒泡排序Bubble Sort算法代码
-
Java对学生成绩排序——通过list.sort()对list进行排序