暂无描述

vecadd4.c 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #ifdef __APPLE__
  4. #include <OpenCL/opencl.h>
  5. #else
  6. #include <CL/cl.h>
  7. #endif
  8. #define MAX_SOURCE_SIZE (0x100000)
  9. const char *clErrorString(cl_int error)
  10. {
  11. switch(error){
  12. // run-time and JIT compiler errors
  13. case 0: return "CL_SUCCESS";
  14. case -1: return "CL_DEVICE_NOT_FOUND";
  15. case -2: return "CL_DEVICE_NOT_AVAILABLE";
  16. case -3: return "CL_COMPILER_NOT_AVAILABLE";
  17. case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  18. case -5: return "CL_OUT_OF_RESOURCES";
  19. case -6: return "CL_OUT_OF_HOST_MEMORY";
  20. case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
  21. case -8: return "CL_MEM_COPY_OVERLAP";
  22. case -9: return "CL_IMAGE_FORMAT_MISMATCH";
  23. case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
  24. case -11: return "CL_BUILD_PROGRAM_FAILURE";
  25. case -12: return "CL_MAP_FAILURE";
  26. case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  27. case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  28. case -15: return "CL_COMPILE_PROGRAM_FAILURE";
  29. case -16: return "CL_LINKER_NOT_AVAILABLE";
  30. case -17: return "CL_LINK_PROGRAM_FAILURE";
  31. case -18: return "CL_DEVICE_PARTITION_FAILED";
  32. case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
  33. // compile-time errors
  34. case -30: return "CL_INVALID_VALUE";
  35. case -31: return "CL_INVALID_DEVICE_TYPE";
  36. case -32: return "CL_INVALID_PLATFORM";
  37. case -33: return "CL_INVALID_DEVICE";
  38. case -34: return "CL_INVALID_CONTEXT";
  39. case -35: return "CL_INVALID_QUEUE_PROPERTIES";
  40. case -36: return "CL_INVALID_COMMAND_QUEUE";
  41. case -37: return "CL_INVALID_HOST_PTR";
  42. case -38: return "CL_INVALID_MEM_OBJECT";
  43. case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
  44. case -40: return "CL_INVALID_IMAGE_SIZE";
  45. case -41: return "CL_INVALID_SAMPLER";
  46. case -42: return "CL_INVALID_BINARY";
  47. case -43: return "CL_INVALID_BUILD_OPTIONS";
  48. case -44: return "CL_INVALID_PROGRAM";
  49. case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
  50. case -46: return "CL_INVALID_KERNEL_NAME";
  51. case -47: return "CL_INVALID_KERNEL_DEFINITION";
  52. case -48: return "CL_INVALID_KERNEL";
  53. case -49: return "CL_INVALID_ARG_INDEX";
  54. case -50: return "CL_INVALID_ARG_VALUE";
  55. case -51: return "CL_INVALID_ARG_SIZE";
  56. case -52: return "CL_INVALID_KERNEL_ARGS";
  57. case -53: return "CL_INVALID_WORK_DIMENSION";
  58. case -54: return "CL_INVALID_WORK_GROUP_SIZE";
  59. case -55: return "CL_INVALID_WORK_ITEM_SIZE";
  60. case -56: return "CL_INVALID_GLOBAL_OFFSET";
  61. case -57: return "CL_INVALID_EVENT_WAIT_LIST";
  62. case -58: return "CL_INVALID_EVENT";
  63. case -59: return "CL_INVALID_OPERATION";
  64. case -60: return "CL_INVALID_GL_OBJECT";
  65. case -61: return "CL_INVALID_BUFFER_SIZE";
  66. case -62: return "CL_INVALID_MIP_LEVEL";
  67. case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
  68. case -64: return "CL_INVALID_PROPERTY";
  69. case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
  70. case -66: return "CL_INVALID_COMPILER_OPTIONS";
  71. case -67: return "CL_INVALID_LINKER_OPTIONS";
  72. case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
  73. // extension errors
  74. case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
  75. case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
  76. case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
  77. case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
  78. case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
  79. case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
  80. default: return "Unknown OpenCL error";
  81. }
  82. }
  83. void debug(int ret) {
  84. if (ret != CL_SUCCESS) {
  85. printf(clErrorString(ret));
  86. printf("\n");
  87. }
  88. }
  89. int main(void) {
  90. printf("start\n");
  91. // Create the two input vectors
  92. int i;
  93. const int LIST_SIZE = 1024;
  94. int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
  95. int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
  96. for(i = 0; i < LIST_SIZE; i++) {
  97. A[i] = i;
  98. B[i] = LIST_SIZE - i;
  99. }
  100. // Load the kernel source code into the array source_str
  101. FILE *fp;
  102. char *source_str;
  103. size_t source_size;
  104. fp = fopen("vecadd4.cl", "r");
  105. if (!fp) {
  106. fprintf(stderr, "Failed to load kernel.\n");
  107. exit(1);
  108. }
  109. source_str = (char*)malloc(MAX_SOURCE_SIZE);
  110. source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
  111. fclose( fp );
  112. // Get platform and device information
  113. cl_device_id device_id = NULL;
  114. cl_uint ret_num_devices;
  115. cl_uint ret_num_platforms;
  116. cl_int ret = clGetPlatformIDs(0, NULL, &ret_num_platforms);
  117. debug(ret);
  118. cl_platform_id *platforms = NULL;
  119. platforms = (cl_platform_id*)malloc(ret_num_platforms*sizeof(cl_platform_id));
  120. ret = clGetPlatformIDs(ret_num_platforms, platforms, NULL);
  121. printf("ret at clGetPlatformIDs (%d) is %d\n", __LINE__, ret);
  122. debug(ret);
  123. ret = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, 1, &device_id, &ret_num_devices);
  124. printf("ret at clGetDeviceIDs (%d) is %d\n", __LINE__, ret);
  125. debug(ret);
  126. // Create an OpenCL context
  127. cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
  128. printf("ret at clCreateContext (%d) is %d\n", __LINE__, ret);
  129. debug(ret);
  130. // Create a command queue
  131. cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
  132. printf("ret at clCreateCommandQueue (%d) is %d\n", __LINE__, ret);
  133. debug(ret);
  134. // Create memory buffers on the device for each vector
  135. cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
  136. cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
  137. cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
  138. // Copy the lists A and B to their respective memory buffers
  139. ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
  140. printf("ret at clEnqueueWriteBuffer (%d) is %d\n", __LINE__, ret);
  141. debug(ret);
  142. ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
  143. printf("ret at clEnqueueWriteBuffer (%d) is %d\n", __LINE__, ret);
  144. debug(ret);
  145. // Create a program from the kernel source
  146. cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
  147. printf("ret at clCreateProgramWithSource (%d) is %d\n", __LINE__, ret);
  148. debug(ret);
  149. // Build the program
  150. ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
  151. printf("ret at clBuildProgram (%d) is %d\n", __LINE__, ret);
  152. if (ret != CL_SUCCESS) {
  153. printf(clErrorString(ret));
  154. printf("\n");
  155. cl_build_status status;
  156. char * log;
  157. size_t log_size;
  158. clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
  159. log = (char*)malloc(log_size+1);
  160. clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
  161. log[log_size-1]=0;
  162. printf(log);
  163. free(log);
  164. }
  165. // Create the OpenCL kernel
  166. // __kernel void sha256_crypt_kernel(__global uint *data_info,__global char *plain_key, __global uint *digest) {
  167. // cl_kernel kernel = clCreateKernel(program, "sha256_crypt_kernel", &ret);
  168. cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
  169. printf("ret at clCreateKernel (%d) is %d\n", __LINE__, ret);
  170. debug(ret);
  171. // Set the arguments of the kernel
  172. ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
  173. printf("ret at clSetKernelArg (%d) is %d\n", __LINE__, ret);
  174. debug(ret);
  175. ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
  176. printf("ret at clSetKernelArg (%d) is %d\n", __LINE__, ret);
  177. debug(ret);
  178. ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
  179. printf("ret at clSetKernelArg (%d) is %d\n", __LINE__, ret);
  180. debug(ret);
  181. //added this to fix garbage output problem
  182. //ret = clSetKernelArg(kernel, 3, sizeof(int), &LIST_SIZE);
  183. // Execute the OpenCL kernel on the list
  184. size_t global_item_size = LIST_SIZE; // Process the entire lists
  185. size_t local_item_size = 8; // Divide work items into groups of 8 (12 ideally, but 1024 isn't)
  186. ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
  187. printf("ret at clEnqueueNDRangeKernel (%d) is %d\n", __LINE__, ret);
  188. if (ret!=CL_SUCCESS) {
  189. printf(clErrorString(ret));
  190. printf("\n");
  191. }
  192. // Read the memory buffer C on the device to the local variable C
  193. int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
  194. ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
  195. printf("ret at clEnqueueReadBuffer (%d) is %d\n", __LINE__, ret);
  196. debug(ret);
  197. // Display the result to the screen
  198. for(i = 0; i < 8; i++)
  199. printf("%d + %d = %d\n", A[i], B[i], C[i]);
  200. // Clean up
  201. ret = clFlush(command_queue);
  202. debug(ret);
  203. ret = clFinish(command_queue);
  204. debug(ret);
  205. ret = clReleaseKernel(kernel);
  206. debug(ret);
  207. ret = clReleaseProgram(program);
  208. debug(ret);
  209. ret = clReleaseMemObject(a_mem_obj);
  210. debug(ret);
  211. ret = clReleaseMemObject(b_mem_obj);
  212. debug(ret);
  213. ret = clReleaseMemObject(c_mem_obj);
  214. debug(ret);
  215. ret = clReleaseCommandQueue(command_queue);
  216. debug(ret);
  217. ret = clReleaseContext(context);
  218. debug(ret);
  219. free(A);
  220. free(B);
  221. free(C);
  222. return 0;
  223. }