Java parallel Bucket Sort v 3.0
首先我对Java的性能表示失望,也可能是我的水平有限。除了求最值没有优化,其他做了一点优化,主要是内存的使用,但是目前跑起来还是需要 18 gigs 内存左右,因为需要对 三亿个随机整数进行排序,开了4个线程,动态开辟线程会更慢.
linear_time: 14.669 secs
parallel_time: 9.279 secs
speed up : 1.5808815605129865
最好情况可以加速一倍,不理想!也许12个线程全开会好一些,但是准备暂时停止对Java的平行计算研究,回到cpp。个人感觉java不是为了高性能计算而生的,更适合做互联网应用. 个人建议不要使用线程池,会更慢!因为我们都是一锤子买卖,只是需要得到实验结果,没有百万用户每日每夜的访问。
4亿个整数:
linear_time: 21.917 secs
parallel_time: 10.672 secs
speed up : 2.053691904047976
当然了,可能是因为 cpu 线性计算数字运算非常快,显得平行计算有点鸡肋. 但是,当数据规模达到100亿,平行计算的性能就会很明显,不过这种情况并不多见。建议研究平行计算 使用 c or cpp ,而不是java。建议各位读者研究数据结构的话,样本一个亿起步,否则样本太小意义不大,比如冒泡排序,我觉得没啥实际意义。
注意一点关于桶排序,整数数据范围,这个非常重要,待排序整数必须有一个范围,而且必须得是整数,小数好像不能使用桶排序。因为,桶排序依靠的就是数组索引值映射待排序数字!你最后得到的数据,根本不是原来你输入 的数据,而是他们在数组上的索引映射,也就是说,你最后得到的是数组索引值!有点类似量子力学那种。桶排序对内存消耗极大,典型的用空间换时间!比如:你的最大待排整数最大为:100000000,那么你必须有个这么长的数组!数值就是索引值,索引值就是数值!它之所以快,因为,我觉得是:对整数数组的访问超快!你给一个索引,闪电般找到这个索引对应的值!
int [] my_array= new int[100000000];
my_array[100000000-1]= super fast…
public class MaxMinTool {
public static int max_item(int original,int fresh_one){
return fresh_one>original?fresh_one:original;
}
public static int max_item(int one,int two,int three,int four){
return max_item(max_item(one,two),max_item(three,four));
}
}
import java.util.List;
public class PrintTool {
public static void print_array(List list)
{
int counter = 0;
for (int i = 0; i < list.size(); i++) {
int[] array = (int[]) list.get(i);
for (int j = 0; j < array.length; j++) {
System.out.println(array[j]);
++counter;
}
}
System.out.println("List counter:"+counter);
}
public static void print_array(int[] array){
for (int j = 0; j < array.length; j++) {
System.out.print(array[j]+"-");
}
}
}
import java.util.Random;
public class RandomGenerator {
public static int[] random_generate(int min,int max,int numtrials){
int[] int_array = new int[numtrials];
Random random = new Random();
for (int i = 0; i < numtrials; i++) {
int random_num = random.nextInt(max)%(max-min+1) + min;
int_array[i] = random_num;
}
return int_array;
}
}
public class BucketSort {
public int max;
public int[] linear_bucket_sort(int[] original_array){
for (int i = 0; i < original_array.length; i++) {
max = MaxMinTool.max_item(max,original_array[i]);
}
return bucket_sort(original_array,max);
}
public int[] bucket_sort(int[] original_array,int max){
// index: 0-99 but max is : 100 . the point is the array index for bucketsort
int[] store_array = new int[max+1];
for (int i = 0; i < original_array.length; i++) {
int int_num = original_array[i];
// here is the miracle of bucketsort
store_array[int_num]= ++store_array[int_num];
}
int[] final_sorted_array = new int[original_array.length];
int cherry_pointer = 0;
for (int cherry = 0; cherry < store_array.length; cherry++) {
int cherry_num = store_array[cherry];
for (int j = 0; j < cherry_num; j++) {
final_sorted_array[cherry_pointer]=cherry;
++cherry_pointer;
}
}
return final_sorted_array;
}
}
public class GoToBucketThread extends Thread{
public int begin_index;
public int[] random_array;
public int bucket_size;
public int cpu_num;
public int[] arr0;
public int pointer0 = 0;
public int[] arr1;
public int pointer1 = 0;
public int[] arr2;
public int pointer2 = 0;
public int[] arr3;
public int pointer3 = 0;
public int max0;
public int max1;
public int max2;
public int max3;
public GoToBucketThread(int begin_index, int[] random_array, int bucket_size,int cpu_num) {
this.begin_index = begin_index;
this.random_array = random_array;
this.bucket_size = bucket_size;
this.cpu_num = cpu_num;
arr0 = new int[bucket_size];
arr1 = new int[bucket_size];
arr2 = new int[bucket_size];
arr3 = new int[bucket_size];
}
@Override
public void run() {
classify_data();
arr0 = wash_zero_data(arr0,pointer0);
arr1 = wash_zero_data(arr1,pointer1);
arr2 = wash_zero_data(arr2,pointer2);
arr3 = wash_zero_data(arr3,pointer3);
}
public int[] wash_zero_data(int[] original_array,int array_pointer){
int[] new_array = new int[array_pointer];
for (int i = 0; i < array_pointer; i++) {
new_array[i] = original_array[i];
}
return new_array;
}
public void classify_data(){
// range random number in different areas.... 1-25,26-50,51-75,76-100
for (int i = begin_index; i < random_array.length; i+=cpu_num) {
int bucket_num = random_array[i]/(bucket_size);
bucket_num = bucket_num>(cpu_num-1)?(cpu_num-1):bucket_num;
switch (bucket_num)
{
case 0: arr0[pointer0] = random_array[i];
++pointer0;
break;
case 1: arr1[pointer1] = random_array[i];
++pointer1;
break;
case 2: arr2[pointer2] = random_array[i];
++pointer2;
break;
case 3: arr3[pointer3] = random_array[i];
++pointer3;
break;
}
} // all cherries are in the right area
}
}
public class SmallBucketThread extends Thread{
public int[] original_array;
public int[] final_sorted_array;
public int max_num;
public int min_num;
public SmallBucketThread(int[] original_array,int min_num,int max_num) {
this.original_array = original_array;
final_sorted_array = new int[original_array.length];
this.max_num = max_num;
this.min_num = min_num;
}
@Override
public void run() {
bucket_sort_parallel(original_array,max_num);
}
public void bucket_sort_parallel(int[] original_array,int max){
// index: 0-99 but max is : 100 . the point is the array index for bucketsort
int[] store_array = new int[max+1];
for (int i = 0; i < original_array.length; i++) {
int int_num = original_array[i];
// here is the miracle of bucketsort,index is the value,and the value is the index as well
store_array[int_num]= ++store_array[int_num];
}
int cherry_pointer = 0;
// loop the cherry store....
for (int cherry = min_num; cherry < max_num; cherry++) {
int cherry_num = store_array[cherry];
// just pick up cherry at least one
if (cherry_num!=0){
for (int j = 0; j < cherry_num; j++) {
final_sorted_array[cherry_pointer] = cherry;
// pointer is at the end
if (cherry_pointer==original_array.length-1)
break;
++cherry_pointer;
}
}
}
}
}
import java.util.Random;
public class RandomGenerator {
public static int[] random_generate(int min,int max,int numtrials){
int[] int_array = new int[numtrials];
Random random = new Random();
for (int i = 0; i < numtrials; i++) {
int random_num = random.nextInt(max)%(max-min+1) + min;
int_array[i] = random_num;
}
return int_array;
}
}
public class MyTimer {
private static long begin;
public static void time_start(){
begin = System.nanoTime();
}
public static double time_end_milli_seconds(){
return System.nanoTime()-begin/1e6;
}
public static double time_end_seconds(){
double total_time = (System.nanoTime()-begin)/1e6;
return ((double)((int)total_time))/1000;
}
}
import java.util.List;
public class TestBucketSort {
public void print_array(List list)
{
for (int i = 0; i < list.size(); i++) {
int[] array = (int[]) list.get(i);
for (int j = 0; j < array.length; j++) {
System.out.println(array[j]);
}
}
}
public void print_array(int[] array){
for (int j = 0; j < array.length; j++) {
System.out.print(array[j]+"-");
}
}
public double linear_bucketsort(int min,int max,int num_trails,int[] random_array){
MyTimer.time_start();
BucketSort bucketSort = new BucketSort();
int[] sorted_array = bucketSort.linear_bucket_sort(random_array);
double time_end = MyTimer.time_end_seconds();
return time_end;
}
public static void main(String[] args) throws InterruptedException {
int test_num = 300000000;
int min = 1;
int max = test_num; // 2 billion
int num_trails = test_num; // 100 million
int[] random_array = RandomGenerator.random_generate(min,max,num_trails);
// PrintTool.print_array(random_array);
TestBucketSort testBucketSort = new TestBucketSort();
double linear_time = testBucketSort.linear_bucketsort(min,max,num_trails,random_array);
BucketSortParallel parallel = new BucketSortParallel();
double parallel_time = parallel.bucket_sort_parallel(min,max,num_trails,random_array);
// System.out.println("parallel result: ");
// PrintTool.print_array(parallel.sorted_list);
System.out.println("linear_time: "+linear_time+" secs");
System.out.println("parallel_time: "+parallel_time+" secs");
System.out.println("speed up : "+ (linear_time/parallel_time));
}
}