欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

OpenCL实例

程序员文章站 2022-07-12 21:31:20
...
OpenCL培训实例,下面是Main函数。
#include <math.h>

#include "CL\opencl.h"
#include "utility.h"

static const size_t vectorSize = 4096; //must be evenly disible by workSize
static const size_t workSize = 256;

//#define EXERCISE1

int main(void)
{
	cl_int err;

	//Setup Platform
	cl_uint myPlatformCount;

	////////////// Exercise 1 Step 2.2 
	err = clGetPlatformIDs(0, NULL, &myPlatformCount);
	
	
	//Get Platform ID
	cl_platform_id myPlatform;
	////////////// Exercise 1 Step 2.3 
	err = clGetPlatformIDs(1, &myPlatform, NULL);
	assert(err==CL_SUCCESS);
	print_platform_info(&myPlatform);


	//Setup Device
	cl_uint myDeviceCount;
	////////////// Exercise 1 Step 2.4
	err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &myDeviceCount);
	
	//Get Device ID
	cl_device_id myDevice;
	////////////// Exercise 1 Step 2.5 
	err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 1, &myDevice, NULL);
	assert(err==CL_SUCCESS);
	print_device_info(&myDevice);
	
	//Create Context
	////////////// Exercise 1 Step 2.6 
	cl_context context = clCreateContext(0, 1, &myDevice, NULL, NULL, &err);
	assert(err==CL_SUCCESS);
	
	//Create Command queue
	////////////// Exercise 1 Step 2.7
	cl_command_queue queue = clCreateCommandQueue(context, myDevice, 0, &err);
	assert(err==CL_SUCCESS);


	////////////// Exercise 1 Step 2.8
	cl_mem kernelIn = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);
	cl_mem kernelIn2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);
	cl_mem kernelOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);

	//Inputs and Outputs to Kernel, X and Y are inputs, Z is output
	void *X, *Y, *Z;
	
	//Allocates memory with value from 0 to 1000
	float LO= 0;   float HI=1000;
	allocate_generate(&X, &Y, &Z, LO, HI, vectorSize);
	//Create Buffers for input and output

	//Write data to device
	////////////// Exercise 1 Step 2.9
	err = clEnqueueWriteBuffer(queue, kernelIn, CL_FALSE, 0, sizeof(cl_float) * vectorSize, X, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, kernelIn2, CL_FALSE, 0, sizeof(cl_float) * vectorSize, Y, 0, NULL, NULL);
	clFinish(queue);
	assert(err==CL_SUCCESS);

#ifndef EXERCISE1
	// create the kernel
	const char *kernel_name = "SimpleKernel";

	size_t lengths;
	unsigned char* binaries = get_binary("SimpleKernel.aocx", &lengths);
	cl_int kernel_status;
	
	// Create the Program from the AOCX file.
	////////////////////// Exercise 2 Step 2.3    ///////////////////
	cl_program program = clCreateProgramWithBinary(context, 1, &myDevice, &lengths, (const unsigned char**)&binaries, &kernel_status, &err);

	assert(err==CL_SUCCESS);
	  
	// build the program
	//////////////      Compile the Kernel.... For Altera, nothing is done here, but this comforms to the standard
	//////////////       Exercise 2   Step 2.4    ///////////////////
	err = clBuildProgram(program, 1, &myDevice, "", NULL, NULL);
	assert(err==CL_SUCCESS);


	// create the kernel
	//////////////       Find Kernel in Program
	//////////////       Exercise 2   Step 2.5    ///////////////////
	cl_kernel kernel = clCreateKernel(program, kernel_name, &err);
	assert(err==CL_SUCCESS);

	//////////////     Set Arguments to the Kernels
	//////////////       Exercise 2   Step 2.6    ///////////////////
	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&kernelIn);
	assert(err==CL_SUCCESS);
	err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&kernelIn2);
	assert(err==CL_SUCCESS);
	err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&kernelOut);
	assert(err==CL_SUCCESS);

	printf("\nLaunching the kernel...\n");


	// launch kernel
	//////////////       Exercise 2   Step 2.7    ///////////////////
	err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &vectorSize, &workSize, 0, NULL, NULL);
	clFinish(queue);
	assert(err==CL_SUCCESS);

	// read the output
	//////////////       Exercise 2   Step 2.8    ///////////////////
	err = clEnqueueReadBuffer(queue, kernelOut, CL_TRUE, 0, sizeof(cl_float) * vectorSize, Z, 0, NULL, NULL);
	assert(err==CL_SUCCESS);
	
	void * CalcZ = malloc(sizeof(float)*vectorSize);

	for (int i=0; i<vectorSize; i++)
	{
		//////////////  Equivalent Code runnign on CPUs
		//////////////       Exercise 2   Step 2.9    ///////////////////
		((float*) CalcZ)[i]=sin(((float*) X)[i] + ((float*) Y)[i]); 
				
	}

	//Print Performance Results
	
	verification (X, Y, Z, CalcZ, vectorSize);

	// Clean up Stuff
	if(kernel) 
		clReleaseKernel(kernel);  
	if(program) 
		clReleaseProgram(program);
	if(queue) 
		clReleaseCommandQueue(queue);
	if(context) 
		clReleaseContext(context);
	if(kernelIn) 
		clReleaseMemObject(kernelIn);
	if(kernelOut) 
		clReleaseMemObject(kernelOut);
	if(X) 
		free (X);
	if(Y) 
		free (Y);
	if(Z) 
		free (Z);
	if(CalcZ) 
		free (CalcZ);

#endif

    return 1;
}


下面是子函数utility.c文件。
 

// This file
#include "utility.h"
#include <math.h>


// 
unsigned char* get_binary(const char * name, size_t* length)
{
	FILE *fp = fopen(name, "rb");
	assert (fp != NULL);
	fseek (fp, 0, SEEK_END);
	*length = ftell (fp);
	unsigned char *binaries = (unsigned char*)malloc(sizeof(unsigned char) **length);
	rewind (fp);
	fread (binaries, *length, 1, fp);
	fclose (fp);
	return binaries;
}


void print_platform_info(cl_platform_id* myPlatform)
{
	cl_int err;
	//Grab Platform Info
	char myPlatformName[128];
	char myPlatformProfile[128];
	char myPlatformVersion[128];
	char myPlatformVendor[128];
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_NAME, 128 * sizeof(char), myPlatformName, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_PROFILE, 128 * sizeof(char), myPlatformProfile, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VERSION, 128 * sizeof(char), myPlatformVersion, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VENDOR, 128 * sizeof(char), myPlatformVendor, NULL);
	printf ("\nPlatform Name: %s\n", myPlatformName);
	printf ("Platform Profile: %s\n", myPlatformProfile);
	printf ("Platform Version: %s\n", myPlatformVersion);
	printf ("Platform Vendor: %s\n", myPlatformVendor);
}


void print_device_info(cl_device_id* myDevice)
{
	cl_int err;
	//Get Device Properties
	char myDeviceVendor[128];
	cl_uint myDeviceMaxCU;
	cl_uint myDeviceMaxWID;
	char myDeviceName[128];
	char myDeviceVersion[128];
	cl_bool myDeviceAvailable;
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_NAME, 128 * sizeof(char), myDeviceName, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_VENDOR, 128 * sizeof(char), myDeviceVendor, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &myDeviceMaxCU, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &myDeviceMaxWID, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_VERSION, 128 * sizeof(char), myDeviceVersion, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &myDeviceAvailable, NULL);
	printf ("\nDevice Name: %s\n", myDeviceName);
	printf ("Device Vendor: %s\n", myDeviceVendor);
	printf ("Device Version: %s\n", myDeviceVersion);
	printf ("Device Available: %d\n", myDeviceAvailable);
	printf ("Device Max Compute Units: %d\n", myDeviceMaxCU);
	printf ("Device Max Work Item Dimensions: %d\n", myDeviceMaxWID);
}

void allocate_generate(void** X, void** Y, void** Z, float LO, float HI, size_t vectorSize)
{
	// allocate and initialize the input and output vectors
	*X = malloc(sizeof(float)*vectorSize);
	*Y = malloc(sizeof(float)*vectorSize);
	*Z = malloc(sizeof(float)*vectorSize);


	//Assigns randome number from LO to HI to all locatoin of X and Y
	for (int i = 0; i < vectorSize; ++i) {
		((float *) *X)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));
		((float *) *Y)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));
	}
}

bool verification (void * X, void * Y, void * Z, void * CalcZ, size_t vectorSize)
{
	//Verify if OpenCL Calculation is Same as C Result
	for(int i = 0; i < vectorSize-4; i++) {
		if(fabs(((float*)CalcZ)[i] - ((float*)Z)[i]) > EPSILON) {
			 printf("\nVERIFICATION FAILED! index %d, X:%f, Y:%f, OpenCL Result:%f != Result %f)",
				  i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i], ((float*)CalcZ)[i]);
			 return false;
		}
    }

	// Print 10 Sample Data to Standard Out
	printf("\n\nVERIFICATION PASSED!!!\n\nSome Sample of Results\n");
	printf("------------------------------------\n");
	for (int i = 0; i < (int)vectorSize; i=i+((int)vectorSize)/5) {
		printf("Index %d: Input 1 is %f, Input 2 is %f, Result is %f\n", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i]);
	}
	return true;
}

需要原工程可以留下邮箱。


相关标签: opencl