Nav apraksta

vecadd2.c 3.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. //Includes
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <iostream>
  5. #ifdef __APPLE__
  6. #include <OpenCL/opencl.h>
  7. #else
  8. #include <CL/cl.h>
  9. #endif
  10. #define DATA_SIZE 10
  11. using namespace std;
  12. const char *ProgramSource =
  13. "__kernel void add(__global float *inputA, __global float *inputB, __global float *output)\n"\
  14. "{\n"\
  15. " size_t id = get_global_id(0);\n"\
  16. " output[id] = inputA[id] + 2.0*inputB[id];\n"\
  17. "}\n";
  18. int main(void)
  19. {
  20. cl_context context;
  21. cl_context_properties properties[3];
  22. cl_kernel kernel;
  23. cl_command_queue command_queue;
  24. cl_program program;
  25. cl_int err;
  26. cl_uint num_of_platforms=0;
  27. cl_platform_id platform_id;
  28. cl_device_id device_id;
  29. cl_uint num_of_devices=0;
  30. cl_mem inputA, inputB, output;
  31. size_t global;
  32. float inputDataA[DATA_SIZE]={1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  33. float inputDataB[DATA_SIZE]={1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  34. float results[DATA_SIZE]={0};
  35. int i;
  36. // retreive a list of platforms avaible
  37. if (clGetPlatformIDs(1, &platform_id, &num_of_platforms)!= CL_SUCCESS)
  38. {
  39. printf("Unable to get platform_id\n");
  40. return 1;
  41. }
  42. // try to get a supported GPU device
  43. if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices) != CL_SUCCESS)
  44. {
  45. printf("Unable to get device_id\n");
  46. return 1;
  47. }
  48. // context properties list - must be terminated with 0
  49. properties[0]= CL_CONTEXT_PLATFORM;
  50. properties[1]= (cl_context_properties) platform_id;
  51. properties[2]= 0;
  52. // create a context with the GPU device
  53. context = clCreateContext(properties,1,&device_id,NULL,NULL,&err);
  54. // create command queue using the context and device
  55. command_queue = clCreateCommandQueue(context, device_id, 0, &err);
  56. // create a program from the kernel source code
  57. program = clCreateProgramWithSource(context,1,(const char **) &ProgramSource, NULL, &err);
  58. // compile the program
  59. if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS)
  60. {
  61. printf("Error building program\n");
  62. return 1;
  63. }
  64. // specify which kernel from the program to execute
  65. kernel = clCreateKernel(program, "add", &err);
  66. // create buffers for the input and ouput
  67. inputA = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
  68. inputB = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
  69. output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
  70. // load data into the input buffer
  71. clEnqueueWriteBuffer(command_queue, inputA, CL_TRUE, 0, sizeof(float) * DATA_SIZE, inputDataA, 0, NULL, NULL);
  72. clEnqueueWriteBuffer(command_queue, inputB, CL_TRUE, 0, sizeof(float) * DATA_SIZE, inputDataB, 0, NULL, NULL);
  73. // set the argument list for the kernel command
  74. clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputA);
  75. clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputB);
  76. clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
  77. global=DATA_SIZE;
  78. // enqueue the kernel command for execution
  79. clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
  80. clFinish(command_queue);
  81. // copy the results from out of the output buffer
  82. clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float) *DATA_SIZE, results, 0, NULL, NULL);
  83. // print the results
  84. printf("output: ");
  85. for(i=0;i<DATA_SIZE; i++)
  86. {
  87. printf("%f ",results[i]);
  88. }
  89. printf("\n");
  90. // cleanup - release OpenCL resources
  91. clReleaseMemObject(inputA);
  92. clReleaseMemObject(inputB);
  93. clReleaseMemObject(output);
  94. clReleaseProgram(program);
  95. clReleaseKernel(kernel);
  96. clReleaseCommandQueue(command_queue);
  97. clReleaseContext(context);
  98. return 0;
  99. }