OpenCL实例

程序员文章站 2022-07-12 21:31:20

...

OpenCL培训实例，下面是Main函数。

#include <math.h>

#include "CL\opencl.h"
#include "utility.h"

static const size_t vectorSize = 4096; //must be evenly disible by workSize
static const size_t workSize = 256;

//#define EXERCISE1

int main(void)
{
	cl_int err;

	//Setup Platform
	cl_uint myPlatformCount;

	////////////// Exercise 1 Step 2.2 
	err = clGetPlatformIDs(0, NULL, &myPlatformCount);
	
	
	//Get Platform ID
	cl_platform_id myPlatform;
	////////////// Exercise 1 Step 2.3 
	err = clGetPlatformIDs(1, &myPlatform, NULL);
	assert(err==CL_SUCCESS);
	print_platform_info(&myPlatform);


	//Setup Device
	cl_uint myDeviceCount;
	////////////// Exercise 1 Step 2.4
	err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &myDeviceCount);
	
	//Get Device ID
	cl_device_id myDevice;
	////////////// Exercise 1 Step 2.5 
	err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 1, &myDevice, NULL);
	assert(err==CL_SUCCESS);
	print_device_info(&myDevice);
	
	//Create Context
	////////////// Exercise 1 Step 2.6 
	cl_context context = clCreateContext(0, 1, &myDevice, NULL, NULL, &err);
	assert(err==CL_SUCCESS);
	
	//Create Command queue
	////////////// Exercise 1 Step 2.7
	cl_command_queue queue = clCreateCommandQueue(context, myDevice, 0, &err);
	assert(err==CL_SUCCESS);


	////////////// Exercise 1 Step 2.8
	cl_mem kernelIn = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);
	cl_mem kernelIn2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);
	cl_mem kernelOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
	assert(err==CL_SUCCESS);

	//Inputs and Outputs to Kernel, X and Y are inputs, Z is output
	void *X, *Y, *Z;
	
	//Allocates memory with value from 0 to 1000
	float LO= 0;   float HI=1000;
	allocate_generate(&X, &Y, &Z, LO, HI, vectorSize);
	//Create Buffers for input and output

	//Write data to device
	////////////// Exercise 1 Step 2.9
	err = clEnqueueWriteBuffer(queue, kernelIn, CL_FALSE, 0, sizeof(cl_float) * vectorSize, X, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, kernelIn2, CL_FALSE, 0, sizeof(cl_float) * vectorSize, Y, 0, NULL, NULL);
	clFinish(queue);
	assert(err==CL_SUCCESS);

#ifndef EXERCISE1
	// create the kernel
	const char *kernel_name = "SimpleKernel";

	size_t lengths;
	unsigned char* binaries = get_binary("SimpleKernel.aocx", &lengths);
	cl_int kernel_status;
	
	// Create the Program from the AOCX file.
	////////////////////// Exercise 2 Step 2.3    ///////////////////
	cl_program program = clCreateProgramWithBinary(context, 1, &myDevice, &lengths, (const unsigned char**)&binaries, &kernel_status, &err);

	assert(err==CL_SUCCESS);
	  
	// build the program
	//////////////      Compile the Kernel.... For Altera, nothing is done here, but this comforms to the standard
	//////////////       Exercise 2   Step 2.4    ///////////////////
	err = clBuildProgram(program, 1, &myDevice, "", NULL, NULL);
	assert(err==CL_SUCCESS);


	// create the kernel
	//////////////       Find Kernel in Program
	//////////////       Exercise 2   Step 2.5    ///////////////////
	cl_kernel kernel = clCreateKernel(program, kernel_name, &err);
	assert(err==CL_SUCCESS);

	//////////////     Set Arguments to the Kernels
	//////////////       Exercise 2   Step 2.6    ///////////////////
	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&kernelIn);
	assert(err==CL_SUCCESS);
	err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&kernelIn2);
	assert(err==CL_SUCCESS);
	err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&kernelOut);
	assert(err==CL_SUCCESS);

	printf("\nLaunching the kernel...\n");


	// launch kernel
	//////////////       Exercise 2   Step 2.7    ///////////////////
	err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &vectorSize, &workSize, 0, NULL, NULL);
	clFinish(queue);
	assert(err==CL_SUCCESS);

	// read the output
	//////////////       Exercise 2   Step 2.8    ///////////////////
	err = clEnqueueReadBuffer(queue, kernelOut, CL_TRUE, 0, sizeof(cl_float) * vectorSize, Z, 0, NULL, NULL);
	assert(err==CL_SUCCESS);
	
	void * CalcZ = malloc(sizeof(float)*vectorSize);

	for (int i=0; i<vectorSize; i++)
	{
		//////////////  Equivalent Code runnign on CPUs
		//////////////       Exercise 2   Step 2.9    ///////////////////
		((float*) CalcZ)[i]=sin(((float*) X)[i] + ((float*) Y)[i]); 
				
	}

	//Print Performance Results
	
	verification (X, Y, Z, CalcZ, vectorSize);

	// Clean up Stuff
	if(kernel) 
		clReleaseKernel(kernel);  
	if(program) 
		clReleaseProgram(program);
	if(queue) 
		clReleaseCommandQueue(queue);
	if(context) 
		clReleaseContext(context);
	if(kernelIn) 
		clReleaseMemObject(kernelIn);
	if(kernelOut) 
		clReleaseMemObject(kernelOut);
	if(X) 
		free (X);
	if(Y) 
		free (Y);
	if(Z) 
		free (Z);
	if(CalcZ) 
		free (CalcZ);

#endif

    return 1;
}

下面是子函数utility.c文件。

// This file
#include "utility.h"
#include <math.h>


// 
unsigned char* get_binary(const char * name, size_t* length)
{
	FILE *fp = fopen(name, "rb");
	assert (fp != NULL);
	fseek (fp, 0, SEEK_END);
	*length = ftell (fp);
	unsigned char *binaries = (unsigned char*)malloc(sizeof(unsigned char) **length);
	rewind (fp);
	fread (binaries, *length, 1, fp);
	fclose (fp);
	return binaries;
}


void print_platform_info(cl_platform_id* myPlatform)
{
	cl_int err;
	//Grab Platform Info
	char myPlatformName[128];
	char myPlatformProfile[128];
	char myPlatformVersion[128];
	char myPlatformVendor[128];
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_NAME, 128 * sizeof(char), myPlatformName, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_PROFILE, 128 * sizeof(char), myPlatformProfile, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VERSION, 128 * sizeof(char), myPlatformVersion, NULL);
	err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VENDOR, 128 * sizeof(char), myPlatformVendor, NULL);
	printf ("\nPlatform Name: %s\n", myPlatformName);
	printf ("Platform Profile: %s\n", myPlatformProfile);
	printf ("Platform Version: %s\n", myPlatformVersion);
	printf ("Platform Vendor: %s\n", myPlatformVendor);
}


void print_device_info(cl_device_id* myDevice)
{
	cl_int err;
	//Get Device Properties
	char myDeviceVendor[128];
	cl_uint myDeviceMaxCU;
	cl_uint myDeviceMaxWID;
	char myDeviceName[128];
	char myDeviceVersion[128];
	cl_bool myDeviceAvailable;
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_NAME, 128 * sizeof(char), myDeviceName, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_VENDOR, 128 * sizeof(char), myDeviceVendor, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &myDeviceMaxCU, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &myDeviceMaxWID, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_VERSION, 128 * sizeof(char), myDeviceVersion, NULL);
	err = clGetDeviceInfo(*myDevice, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &myDeviceAvailable, NULL);
	printf ("\nDevice Name: %s\n", myDeviceName);
	printf ("Device Vendor: %s\n", myDeviceVendor);
	printf ("Device Version: %s\n", myDeviceVersion);
	printf ("Device Available: %d\n", myDeviceAvailable);
	printf ("Device Max Compute Units: %d\n", myDeviceMaxCU);
	printf ("Device Max Work Item Dimensions: %d\n", myDeviceMaxWID);
}

void allocate_generate(void** X, void** Y, void** Z, float LO, float HI, size_t vectorSize)
{
	// allocate and initialize the input and output vectors
	*X = malloc(sizeof(float)*vectorSize);
	*Y = malloc(sizeof(float)*vectorSize);
	*Z = malloc(sizeof(float)*vectorSize);


	//Assigns randome number from LO to HI to all locatoin of X and Y
	for (int i = 0; i < vectorSize; ++i) {
		((float *) *X)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));
		((float *) *Y)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));
	}
}

bool verification (void * X, void * Y, void * Z, void * CalcZ, size_t vectorSize)
{
	//Verify if OpenCL Calculation is Same as C Result
	for(int i = 0; i < vectorSize-4; i++) {
		if(fabs(((float*)CalcZ)[i] - ((float*)Z)[i]) > EPSILON) {
			 printf("\nVERIFICATION FAILED! index %d, X:%f, Y:%f, OpenCL Result:%f != Result %f)",
				  i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i], ((float*)CalcZ)[i]);
			 return false;
		}
    }

	// Print 10 Sample Data to Standard Out
	printf("\n\nVERIFICATION PASSED!!!\n\nSome Sample of Results\n");
	printf("------------------------------------\n");
	for (int i = 0; i < (int)vectorSize; i=i+((int)vectorSize)/5) {
		printf("Index %d: Input 1 is %f, Input 2 is %f, Result is %f\n", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i]);
	}
	return true;
}

需要原工程可以留下邮箱。

OpenCL实例

React组件设计模式之组合组件应用实例分析

php中注册器模式类用法实例分析

Python装饰器使用实例：验证参数合法性

Android实现长图文截图功能实例代码

smarty简单应用实例

Python简单进程锁代码实例

vue+VeeValidate 校验范围实例详解（部分校验,全部校验）

Android中的相对路径实例详解

Tab切换组件（选项卡功能）实例代码

vue-quill-editor+plupload富文本编辑器实例详解