OpenCL で deviceQuery ぽいものを作ってみた
deviceQuery とは...
deviceQuery とは GPGPU 向け SDK である CUDA に含まれているデバイス情報を取得するサンプルコードです(CUDA Samples :: CUDA Toolkit Documentation).今回は OpenCL を使って搭載されているデバイス情報を取得する (deviceQuery ぽいもの) を作ります.
コンパイル方法
$ g++ opencl_test.cpp -framework opencl
ソースコード
/**-------------------- deviceQuery.cpp --------------------*/ #include <iostream> #include <cstring> #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include <CL/cl.h> #endif //for platform information struct platform_info{ char info_name[1024]; cl_platform_info info_type; }; //for device information struct device_info{ char info_name[1024]; cl_device_info info_type; }; struct platform_info pinfo_list[] = { {"CL_PLATFORM_PROFILE", CL_PLATFORM_PROFILE}, {"CL_PLATFORM_VERSION", CL_PLATFORM_VERSION}, {"CL_PLATFORM_NAME", CL_PLATFORM_NAME}, {"CL_PLATFORM_VENDOR", CL_PLATFORM_VENDOR}, {"CL_PLATFORM_EXTENSIONS", CL_PLATFORM_EXTENSIONS} }; struct device_info dinfo_list[] = { {"CL_DEVICE_TYPE", CL_DEVICE_TYPE}, {"CL_DEVICE_VENDOR_ID", CL_DEVICE_VENDOR_ID}, {"CL_DEVICE_MAX_COMPUTE_UNITS", CL_DEVICE_MAX_COMPUTE_UNITS}, {"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS", CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}, {"CL_DEVICE_MAX_WORK_ITEM_SIZES", CL_DEVICE_MAX_WORK_ITEM_SIZES}, {"CL_DEVICE_MAX_WORK_GROUP_SIZE", CL_DEVICE_MAX_WORK_GROUP_SIZE}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR", CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT", CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG", CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT", CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT}, {"CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE", CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}, {"CL_DEVICE_MAX_CLOCK_FREQUENCY", CL_DEVICE_MAX_CLOCK_FREQUENCY}, {"CL_DEVICE_ADDRESS_BITS", CL_DEVICE_ADDRESS_BITS}, {"CL_DEVICE_MAX_MEM_ALLOC_SIZE", CL_DEVICE_MAX_MEM_ALLOC_SIZE}, {"CL_DEVICE_IMAGE_SUPPORT", CL_DEVICE_IMAGE_SUPPORT}, {"CL_DEVICE_MAX_READ_IMAGE_ARGS", CL_DEVICE_MAX_READ_IMAGE_ARGS}, {"CL_DEVICE_MAX_WRITE_IMAGE_ARGS", CL_DEVICE_MAX_WRITE_IMAGE_ARGS}, {"CL_DEVICE_IMAGE2D_MAX_WIDTH", CL_DEVICE_IMAGE2D_MAX_WIDTH}, {"CL_DEVICE_IMAGE2D_MAX_HEIGHT", CL_DEVICE_IMAGE2D_MAX_HEIGHT}, {"CL_DEVICE_IMAGE3D_MAX_WIDTH", CL_DEVICE_IMAGE3D_MAX_WIDTH}, {"CL_DEVICE_IMAGE3D_MAX_HEIGHT", CL_DEVICE_IMAGE3D_MAX_HEIGHT}, {"CL_DEVICE_IMAGE3D_MAX_DEPTH", CL_DEVICE_IMAGE3D_MAX_DEPTH}, {"CL_DEVICE_MAX_SAMPLERS", CL_DEVICE_MAX_SAMPLERS}, {"CL_DEVICE_MAX_PARAMETER_SIZE", CL_DEVICE_MAX_PARAMETER_SIZE}, {"CL_DEVICE_MEM_BASE_ADDR_ALIGN", CL_DEVICE_MEM_BASE_ADDR_ALIGN}, {"CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE", CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE}, {"CL_DEVICE_SINGLE_FP_CONFIG", CL_DEVICE_SINGLE_FP_CONFIG}, {"CL_DEVICE_GLOBAL_MEM_CACHE_TYPE", CL_DEVICE_GLOBAL_MEM_CACHE_TYPE}, {"CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE", CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE}, {"CL_DEVICE_GLOBAL_MEM_CACHE_SIZE", CL_DEVICE_GLOBAL_MEM_CACHE_SIZE}, {"CL_DEVICE_GLOBAL_MEM_SIZE", CL_DEVICE_GLOBAL_MEM_SIZE}, {"CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE", CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE}, {"CL_DEVICE_MAX_CONSTANT_ARGS", CL_DEVICE_MAX_CONSTANT_ARGS}, {"CL_DEVICE_LOCAL_MEM_TYPE", CL_DEVICE_LOCAL_MEM_TYPE}, {"CL_DEVICE_LOCAL_MEM_SIZE", CL_DEVICE_LOCAL_MEM_SIZE}, {"CL_DEVICE_ERROR_CORRECTION_SUPPORT", CL_DEVICE_ERROR_CORRECTION_SUPPORT}, {"CL_DEVICE_PROFILING_TIMER_RESOLUTION", CL_DEVICE_PROFILING_TIMER_RESOLUTION}, {"CL_DEVICE_ENDIAN_LITTLE", CL_DEVICE_ENDIAN_LITTLE}, {"CL_DEVICE_AVAILABLE", CL_DEVICE_AVAILABLE}, {"CL_DEVICE_EXECUTION_CAPABILITIES", CL_DEVICE_EXECUTION_CAPABILITIES}, {"CL_DEVICE_QUEUE_PROPERTIES", CL_DEVICE_QUEUE_PROPERTIES}, {"CL_DEVICE_PLATFORM", CL_DEVICE_PLATFORM}, {"CL_DEVICE_NAME", CL_DEVICE_NAME}, {"CL_DEVICE_VENDOR", CL_DEVICE_VENDOR}, {"CL_DEVICE_VERSION", CL_DEVICE_VERSION}, {"CL_DEVICE_PROFILE", CL_DEVICE_PROFILE}, {"CL_DEVICE_VERSION", CL_DEVICE_VERSION}, {"CL_DEVICE_EXTENSIONS", CL_DEVICE_EXTENSIONS} }; std::size_t const pinfo_size = sizeof(pinfo_list) / sizeof(pinfo_list[0]); std::size_t const dinfo_size = sizeof(dinfo_list) / sizeof(dinfo_list[0]); struct platform_data{ cl_uint size; cl_platform_id* platform; }; struct device_data{ cl_uint size; cl_device_id* device; }; platform_data* allocate_platform(void){ cl_int ret; cl_uint size; struct platform_data* pdata = NULL; do{ //Get: Number of Platforms ret = clGetPlatformIDs(0, NULL, &size); if(ret != CL_SUCCESS){ std::cerr << "Error - clGetPlatformIDs(" << ret << ")" << std::endl; break; } //Allocate memory pdata = (platform_data*)malloc(sizeof(platform_data)); if(pdata == NULL){ std::cerr << "Error - Can not allocate memory" << std::endl; break; } pdata->size = size; pdata->platform = (cl_platform_id*)malloc(sizeof(cl_platform_id) * size); if(pdata->platform == NULL){ std::cerr << "Error - Can not allocate memory" << std::endl; break; } //Get: Platform's information ret = clGetPlatformIDs(size, pdata->platform, &pdata->size); if(ret != CL_SUCCESS){ std::cerr << "Error - clGetPlatformIDs(" << ret << ")" << std::endl; break; } } while(0); return pdata; } void free_platform(platform_data* pdata){ if(pdata != NULL){ if(pdata->platform != NULL){ free(pdata->platform); pdata->platform = NULL; } free(pdata); pdata = NULL; } } device_data* allocate_device(cl_platform_id* platform_id){ cl_int ret; cl_uint size; struct device_data* ddata = NULL; do{ if(platform_id == NULL){ break; } //Get: Number of Devices ret = clGetDeviceIDs(platform_id[0], CL_DEVICE_TYPE_ALL, 0, NULL, &size); if(ret != CL_SUCCESS){ break; } //Allocate memory ddata = (device_data*)malloc(sizeof(device_data) * size); if(ddata == NULL){ break; } ddata->size = size; ddata->device = (cl_device_id*)malloc(sizeof(cl_device_id) * size); if(ddata->device == NULL){ break; } //Get: Device's information ret = clGetDeviceIDs(platform_id[0], CL_DEVICE_TYPE_ALL, size, ddata->device, &ddata->size); if(ret != CL_SUCCESS){ break; } } while(0); return ddata; } void free_device(device_data* ddata){ if(ddata != NULL){ if(ddata->device != NULL){ free(ddata->device); ddata->device = NULL; } free(ddata); ddata = NULL; } } void show_platform_id(cl_platform_id* platform_id){ std::size_t const buf_size = 1025; char info[buf_size]; cl_int ret; for(std::size_t i=0; i<pinfo_size; ++i){ memset(info, 0x00, sizeof(info)); ret = clGetPlatformInfo(platform_id[0], pinfo_list[i].info_type, sizeof(info), info, NULL); if(ret != CL_SUCCESS){ break; } std::cout << " " << pinfo_list[i].info_name << " = " << info << std::endl; } } void show_device_id(cl_device_id* device_id){ std::size_t const buf_size = 1025; char info[buf_size]; cl_int ret; for(std::size_t i=0; i<dinfo_size; ++i){ memset(info, 0x00, sizeof(info)); ret = clGetDeviceInfo(device_id[0], dinfo_list[i].info_type, sizeof(info), info, NULL); if(ret != CL_SUCCESS){ break; } std::cout << " " << dinfo_list[i].info_name << " = " << info << std::endl; } } int main(void){ platform_data* pdata; device_data* ddata; do{ pdata = allocate_platform(); if(pdata == NULL){ break; } std::cout << "Number of Platforms = " << pdata->size << std::endl; for(cl_uint i=0; i<pdata->size; ++i){ std::cout << "Platform :" << i << std::endl; show_platform_id(&pdata->platform[i]); ddata = allocate_device(&pdata->platform[i]); if(ddata == NULL){ break; } std::cout << " Number of Devices = " << ddata->size << std::endl; for(cl_uint j=0; j<ddata->size; ++j){ std::cout << "------------------------------" << std::endl; std::cout << " Device: " << j << std::endl; show_device_id(&ddata->device[j]); } free_device(ddata); } } while(0); free_platform(pdata); return 0; }
実行結果
Number of Platforms = 1 Platform :0 CL_PLATFORM_PROFILE = FULL_PROFILE CL_PLATFORM_VERSION = OpenCL 1.2 (Dec 4 2012 18:26:30) CL_PLATFORM_NAME = Apple CL_PLATFORM_VENDOR = Apple CL_PLATFORM_EXTENSIONS = cl_APPLE_SetMemObjectDestructor cl_APPLE_ContextLoggingFunctions cl_APPLE_clut cl_APPLE_query_kernel_names cl_APPLE_gl_sharing cl_khr_gl_event Number of Devices = 1 ------------------------------ Device: 0 CL_DEVICE_TYPE = CL_DEVICE_VENDOR_ID = ???? CL_DEVICE_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_GROUP_SIZE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_MAX_CLOCK_FREQUENCY = ? CL_DEVICE_ADDRESS_BITS = @ CL_DEVICE_MAX_MEM_ALLOC_SIZE = CL_DEVICE_IMAGE_SUPPORT = CL_DEVICE_MAX_READ_IMAGE_ARGS = ? CL_DEVICE_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_DEPTH = CL_DEVICE_MAX_SAMPLERS = CL_DEVICE_MAX_PARAMETER_SIZE = CL_DEVICE_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE = ? CL_DEVICE_SINGLE_FP_CONFIG = ? CL_DEVICE_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE = @ CL_DEVICE_GLOBAL_MEM_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_ARGS = CL_DEVICE_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_SIZE = CL_DEVICE_ERROR_CORRECTION_SUPPORT = CL_DEVICE_PROFILING_TIMER_RESOLUTION = CL_DEVICE_ENDIAN_LITTLE = CL_DEVICE_AVAILABLE = CL_DEVICE_EXECUTION_CAPABILITIES = CL_DEVICE_QUEUE_PROPERTIES = CL_DEVICE_PLATFORM = CL_DEVICE_NAME = Intel(R) Core(TM) i7-2620M CPU @ 2.70GHz CL_DEVICE_VENDOR = Intel CL_DEVICE_VERSION = OpenCL 1.2 CL_DEVICE_PROFILE = FULL_PROFILE CL_DEVICE_VERSION = OpenCL 1.2 CL_DEVICE_EXTENSIONS = cl_APPLE_SetMemObjectDestructor cl_APPLE_ContextLoggingFunctions cl_APPLE_clut cl_APPLE_query_kernel_names cl_APPLE_gl_sharing cl_khr_gl_event cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_byte_addressable_store cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_3d_image_writes cl_APPLE_fp64_basic_ops cl_APPLE_fixed_alpha_channel_orders cl_APPLE_biased_fixed_point_image_formats
所感
- プラットフォーム = PC,デバイス = 計算ユニットぽい
- よく本だと clGetPlatformIDs の第1引数を 1 にしているが,2以上指定するケースは分散計算させる場合のみ(?)
- CPU も”デバイス”として認識される (計算機ユニット的な扱い)
- CPU のデバイス情報は取得できないものが多い(コードのバグ?)
- CPU に搭載されている GPU はデバイスとして認識できない & 使用できない
追記
CPU に搭載されている GPU (HD 4000等) は Windows 7,Windows 8 環境ならば確認できるそうです.@Vengineer さん,情報提供ありがとうございました orz
途中でエラー処理が面倒になった.反省はしているorz OpenCL で deviceQuery ぽいものを作ってみた - kawa0810の日記 (id:kawa0810 / @kawa0810) URL