Hello OpenCL

Parallel/OpenCL

Hello OpenCL

Houkibosi 2013. 10. 8. 17:02

// int val[2] = {1,2}를 입력으로 받아서 val = { 2,1 } 로 값을 변환하여 주는 예제프로그램이다.

// test platform : Visual Studio 2008 + ati stream sdk beta2.0

#include <stdio.h>

#include <stdlib.h>

#include <CL/cl.h>

// error check macro : utility

#define CHECK_CL_ERROR(retcode, msg, exit_label) { \

if (retcode != CL_SUCCESS) { \

printf(msg); printf("\n"); \

goto exit_label; \

}

int main()

{

cl_int input[2] = {1, 2};

cl_int output[2] = {};

cl_context context = 0;

cl_device_id * devices = NULL;

cl_mem inputBuffer = 0;

cl_mem outputBuffer = 0;

cl_command_queue cmd_queue = 0;

cl_program program = 0;

cl_kernel kernel = 0;

cl_int status = 0;

size_t num_devices = 0;

// 1st. context를 만든다. (예제는 CPU)

// GPU나 다른 context를 만들려면 플랫폼정보를 읽어들여서 지원범위를 보고 지원한다면 결정해주면된다.

context = clCreateContextFromType( 0, CL_DEVICE_TYPE_CPU, NULL, NULL, &status);

CHECK_CL_ERROR(status, "clCreateContextFromType", EXIT);

printf("get CPU context OK!\n");

// 2nd. computing device의 갯수를 얻는다.

// GPU라면 연산유닛갯수, CPU라면 코어갯수가 될것이다.

status = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &num_devices);

CHECK_CL_ERROR(status, "clGetContextInfo", EXIT);

printf("device num : %d\n", num_devices);

if(num_devices == 0) goto EXIT;

// 3rd. device갯수만큼 디바이스를 핸들링할 id를 할당한다.

devices = (cl_device_id *) malloc(num_devices);

// 4th. 디바이스갯수만큼의 디바이스 id값을 얻어온다.

status = clGetContextInfo( context, CL_CONTEXT_DEVICES, num_devices, devices, NULL);

CHECK_CL_ERROR(status, "clGetContextInfo", EXIT);

// 5th. command큐를 만든다. 디바이스 단위로 커맨드큐는 만들수 있다.

// 하나 이상의 device가 있다면 실험으로 0번째 디바이스에 큐를 만든다.

cmd_queue = clCreateCommandQueue( context, devices[0], 0, &status);

CHECK_CL_ERROR(status, "clCreateCommandQueue", EXIT);

// 6th. kernel에 입력으로 줄 버퍼를 만든다. 호스트에서 정적으로 할당하고 할당된 포인터를 사용한다.

inputBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,

sizeof(cl_int) * 2, &input, &status);

CHECK_CL_ERROR(status, "clCreateBuffer", EXIT);

// 7th. kernel의 출력으로 받을 버퍼를 만든다. 호스트의 포인터를 사용한다.

outputBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,

sizeof(cl_int) * 2, &output, &status);

CHECK_CL_ERROR(status, "clCreateBuffer", EXIT);

// 여기까지 정리..

// key1. 실제의 동작은 커맨드큐단위로 이루어진다.

// key2. 커맨드큐는 디바이스단위로 이루어진다.

// key3. kernel함수의 입출력력은 cl함수를 이용하여 버퍼로 할당한다.

// 8th. 커널의 소스이다. 별도의 소스파일로 작성해도 된다. 물론 소스없이 미리 빌드해서 바이너리로 만들어도 된다.

const char *program_source =

"__kernel void test(__global int * output, "

" __global int * input) "

"{"

//" *output = *input;"

" output[0] = input[1];"

" output[1] = input[0];"

"}";

// 9th. 만든 커널을 프로그램으로 설정한다.

printf("create a OpenCL program using the kernel source\n");

program = clCreateProgramWithSource( context, 1, &program_source, NULL, &status);

CHECK_CL_ERROR(status, "clCreateProgramWithSource", EXIT);

// 10th. 프로그램을 컴파일한다.

printf("building program\n");

status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

CHECK_CL_ERROR(status, "clCreateProgramWithSource", EXIT);

// 11th. 프로그램에서 test라는 함수를 커널로 생성한다.

printf("create kernel\n");

kernel = clCreateKernel(program, "test", &status);

CHECK_CL_ERROR(status, "clCreateKernel", EXIT);

// 12th. ND range를 설정한다. work-group은 하나고 work-item도 하나이다. 싱글프로세싱동작이다.

size_t global_work_size = 1;

size_t local_work_size = 1;

int argIdx = 0;

// 13th. 만들어진 커널(test함수)에 파라미터를 넣는다. 파라미터는 미리할당해둔 __global int *의 버퍼들이다.

printf("set kernel arguments\n");

// arg1

status = clSetKernelArg( kernel, argIdx, sizeof(cl_mem), (void *)&outputBuffer);

CHECK_CL_ERROR(status, "clSetKernelArg", EXIT);

argIdx ++;

// arg2

status = clSetKernelArg( kernel, argIdx, sizeof(cl_mem), (void *)&inputBuffer);

CHECK_CL_ERROR(status, "clSetKernelArg", EXIT);

argIdx ++;

// 14th. NDRange로 설정된 맵에 현재 디바이스의 큐에, 커널(test함수)를 집어 넣는다. 실행이 될것이다.

printf("{ executing kernels... (enqueue kernels)\n");

status = clEnqueueNDRangeKernel( cmd_queue, kernel, 1, NULL,

&global_work_size, &local_work_size,

0, NULL, NULL);

CHECK_CL_ERROR(status, "clEnqueueNDRangeKernel", EXIT);

printf("} executing kernels... (enqueue kernels)\n");

// 15th. 버퍼를 읽는함수는 자체로 barrier다. 결과가 끝나기를 기다려서 결과를 얻어온다.

status = clEnqueueReadBuffer( cmd_queue, outputBuffer, CL_TRUE, 0,

sizeof(cl_int), &output,

0, NULL, NULL);

CHECK_CL_ERROR(status, "clEnqueueReadBuffer", EXIT);

EXIT:

// 16th. 리소스를 반환한다.

if (inputBuffer != (cl_mem) 0)

clReleaseMemObject(inputBuffer);

if (outputBuffer != (cl_mem) 0)

clReleaseMemObject(outputBuffer);

if (kernel != (cl_kernel) 0)

clReleaseKernel(kernel);

if (program != (cl_program) 0)

clReleaseProgram(program);

if (cmd_queue != (cl_command_queue) 0)

clReleaseCommandQueue(cmd_queue);

if (context != (cl_context) 0)

clReleaseContext(context);

if (devices )

free(devices);

// 17th. 결과 출력

int i;

printf("\n input : \n \t");

for (i=0; i<2; i++) printf("%d ", input[i]);

printf("\n output : \n \t");

for (i=0; i<2; i++) printf("%d ", output[i]);

printf("\n");

return 0;

}

출처 : http://cafe.naver.com/opencl/12

저작자표시 (새창열림)