OpenCL实例
程序员文章站
2022-07-12 21:31:20
...
OpenCL培训实例,下面是Main函数。
#include <math.h>
#include "CL\opencl.h"
#include "utility.h"
static const size_t vectorSize = 4096; //must be evenly disible by workSize
static const size_t workSize = 256;
//#define EXERCISE1
int main(void)
{
cl_int err;
//Setup Platform
cl_uint myPlatformCount;
////////////// Exercise 1 Step 2.2
err = clGetPlatformIDs(0, NULL, &myPlatformCount);
//Get Platform ID
cl_platform_id myPlatform;
////////////// Exercise 1 Step 2.3
err = clGetPlatformIDs(1, &myPlatform, NULL);
assert(err==CL_SUCCESS);
print_platform_info(&myPlatform);
//Setup Device
cl_uint myDeviceCount;
////////////// Exercise 1 Step 2.4
err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &myDeviceCount);
//Get Device ID
cl_device_id myDevice;
////////////// Exercise 1 Step 2.5
err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 1, &myDevice, NULL);
assert(err==CL_SUCCESS);
print_device_info(&myDevice);
//Create Context
////////////// Exercise 1 Step 2.6
cl_context context = clCreateContext(0, 1, &myDevice, NULL, NULL, &err);
assert(err==CL_SUCCESS);
//Create Command queue
////////////// Exercise 1 Step 2.7
cl_command_queue queue = clCreateCommandQueue(context, myDevice, 0, &err);
assert(err==CL_SUCCESS);
////////////// Exercise 1 Step 2.8
cl_mem kernelIn = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
assert(err==CL_SUCCESS);
cl_mem kernelIn2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
assert(err==CL_SUCCESS);
cl_mem kernelOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);
assert(err==CL_SUCCESS);
//Inputs and Outputs to Kernel, X and Y are inputs, Z is output
void *X, *Y, *Z;
//Allocates memory with value from 0 to 1000
float LO= 0; float HI=1000;
allocate_generate(&X, &Y, &Z, LO, HI, vectorSize);
//Create Buffers for input and output
//Write data to device
////////////// Exercise 1 Step 2.9
err = clEnqueueWriteBuffer(queue, kernelIn, CL_FALSE, 0, sizeof(cl_float) * vectorSize, X, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, kernelIn2, CL_FALSE, 0, sizeof(cl_float) * vectorSize, Y, 0, NULL, NULL);
clFinish(queue);
assert(err==CL_SUCCESS);
#ifndef EXERCISE1
// create the kernel
const char *kernel_name = "SimpleKernel";
size_t lengths;
unsigned char* binaries = get_binary("SimpleKernel.aocx", &lengths);
cl_int kernel_status;
// Create the Program from the AOCX file.
////////////////////// Exercise 2 Step 2.3 ///////////////////
cl_program program = clCreateProgramWithBinary(context, 1, &myDevice, &lengths, (const unsigned char**)&binaries, &kernel_status, &err);
assert(err==CL_SUCCESS);
// build the program
////////////// Compile the Kernel.... For Altera, nothing is done here, but this comforms to the standard
////////////// Exercise 2 Step 2.4 ///////////////////
err = clBuildProgram(program, 1, &myDevice, "", NULL, NULL);
assert(err==CL_SUCCESS);
// create the kernel
////////////// Find Kernel in Program
////////////// Exercise 2 Step 2.5 ///////////////////
cl_kernel kernel = clCreateKernel(program, kernel_name, &err);
assert(err==CL_SUCCESS);
////////////// Set Arguments to the Kernels
////////////// Exercise 2 Step 2.6 ///////////////////
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&kernelIn);
assert(err==CL_SUCCESS);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&kernelIn2);
assert(err==CL_SUCCESS);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&kernelOut);
assert(err==CL_SUCCESS);
printf("\nLaunching the kernel...\n");
// launch kernel
////////////// Exercise 2 Step 2.7 ///////////////////
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &vectorSize, &workSize, 0, NULL, NULL);
clFinish(queue);
assert(err==CL_SUCCESS);
// read the output
////////////// Exercise 2 Step 2.8 ///////////////////
err = clEnqueueReadBuffer(queue, kernelOut, CL_TRUE, 0, sizeof(cl_float) * vectorSize, Z, 0, NULL, NULL);
assert(err==CL_SUCCESS);
void * CalcZ = malloc(sizeof(float)*vectorSize);
for (int i=0; i<vectorSize; i++)
{
////////////// Equivalent Code runnign on CPUs
////////////// Exercise 2 Step 2.9 ///////////////////
((float*) CalcZ)[i]=sin(((float*) X)[i] + ((float*) Y)[i]);
}
//Print Performance Results
verification (X, Y, Z, CalcZ, vectorSize);
// Clean up Stuff
if(kernel)
clReleaseKernel(kernel);
if(program)
clReleaseProgram(program);
if(queue)
clReleaseCommandQueue(queue);
if(context)
clReleaseContext(context);
if(kernelIn)
clReleaseMemObject(kernelIn);
if(kernelOut)
clReleaseMemObject(kernelOut);
if(X)
free (X);
if(Y)
free (Y);
if(Z)
free (Z);
if(CalcZ)
free (CalcZ);
#endif
return 1;
}
下面是子函数utility.c文件。
// This file
#include "utility.h"
#include <math.h>
//
unsigned char* get_binary(const char * name, size_t* length)
{
FILE *fp = fopen(name, "rb");
assert (fp != NULL);
fseek (fp, 0, SEEK_END);
*length = ftell (fp);
unsigned char *binaries = (unsigned char*)malloc(sizeof(unsigned char) **length);
rewind (fp);
fread (binaries, *length, 1, fp);
fclose (fp);
return binaries;
}
void print_platform_info(cl_platform_id* myPlatform)
{
cl_int err;
//Grab Platform Info
char myPlatformName[128];
char myPlatformProfile[128];
char myPlatformVersion[128];
char myPlatformVendor[128];
err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_NAME, 128 * sizeof(char), myPlatformName, NULL);
err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_PROFILE, 128 * sizeof(char), myPlatformProfile, NULL);
err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VERSION, 128 * sizeof(char), myPlatformVersion, NULL);
err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VENDOR, 128 * sizeof(char), myPlatformVendor, NULL);
printf ("\nPlatform Name: %s\n", myPlatformName);
printf ("Platform Profile: %s\n", myPlatformProfile);
printf ("Platform Version: %s\n", myPlatformVersion);
printf ("Platform Vendor: %s\n", myPlatformVendor);
}
void print_device_info(cl_device_id* myDevice)
{
cl_int err;
//Get Device Properties
char myDeviceVendor[128];
cl_uint myDeviceMaxCU;
cl_uint myDeviceMaxWID;
char myDeviceName[128];
char myDeviceVersion[128];
cl_bool myDeviceAvailable;
err = clGetDeviceInfo(*myDevice, CL_DEVICE_NAME, 128 * sizeof(char), myDeviceName, NULL);
err = clGetDeviceInfo(*myDevice, CL_DEVICE_VENDOR, 128 * sizeof(char), myDeviceVendor, NULL);
err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &myDeviceMaxCU, NULL);
err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &myDeviceMaxWID, NULL);
err = clGetDeviceInfo(*myDevice, CL_DEVICE_VERSION, 128 * sizeof(char), myDeviceVersion, NULL);
err = clGetDeviceInfo(*myDevice, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &myDeviceAvailable, NULL);
printf ("\nDevice Name: %s\n", myDeviceName);
printf ("Device Vendor: %s\n", myDeviceVendor);
printf ("Device Version: %s\n", myDeviceVersion);
printf ("Device Available: %d\n", myDeviceAvailable);
printf ("Device Max Compute Units: %d\n", myDeviceMaxCU);
printf ("Device Max Work Item Dimensions: %d\n", myDeviceMaxWID);
}
void allocate_generate(void** X, void** Y, void** Z, float LO, float HI, size_t vectorSize)
{
// allocate and initialize the input and output vectors
*X = malloc(sizeof(float)*vectorSize);
*Y = malloc(sizeof(float)*vectorSize);
*Z = malloc(sizeof(float)*vectorSize);
//Assigns randome number from LO to HI to all locatoin of X and Y
for (int i = 0; i < vectorSize; ++i) {
((float *) *X)[i] = LO + (float)rand()/((float)RAND_MAX/(HI-LO));
((float *) *Y)[i] = LO + (float)rand()/((float)RAND_MAX/(HI-LO));
}
}
bool verification (void * X, void * Y, void * Z, void * CalcZ, size_t vectorSize)
{
//Verify if OpenCL Calculation is Same as C Result
for(int i = 0; i < vectorSize-4; i++) {
if(fabs(((float*)CalcZ)[i] - ((float*)Z)[i]) > EPSILON) {
printf("\nVERIFICATION FAILED! index %d, X:%f, Y:%f, OpenCL Result:%f != Result %f)",
i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i], ((float*)CalcZ)[i]);
return false;
}
}
// Print 10 Sample Data to Standard Out
printf("\n\nVERIFICATION PASSED!!!\n\nSome Sample of Results\n");
printf("------------------------------------\n");
for (int i = 0; i < (int)vectorSize; i=i+((int)vectorSize)/5) {
printf("Index %d: Input 1 is %f, Input 2 is %f, Result is %f\n", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i]);
}
return true;
}
需要原工程可以留下邮箱。
上一篇: 初识opencl(1)