device query

~/samples/NVIDIA_CUDA-9.1_Samples/1_Utilities/deviceQuery$ make

/usr/local/cuda-9.1/bin/nvcc -ccbin g++ -I../../common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_70,code=compute_70 -o deviceQuery.o -c deviceQuery.cpp

/usr/local/cuda-9.1/bin/nvcc -ccbin g++ -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_70,code=compute_70 -o deviceQuery deviceQuery.o

mkdir -p ../../bin/x86_64/linux/release

cp deviceQuery ../../bin/x86_64/linux/release

~/samples/NVIDIA_CUDA-9.1_Samples/1_Utilities/deviceQuery$ ls

deviceQuery deviceQuery.cpp deviceQuery.o Makefile NsightEclipse.xml readme.txt

~/samples/NVIDIA_CUDA-9.1_Samples/1_Utilities/deviceQuery$ ./deviceQuery

./deviceQuery Starting...

CUDA Device Query (Runtime API) version (CUDART static linking)

Detected 1 CUDA Capable device(s)

Device 0: "GeForce GTX 1050"

CUDA Driver Version / Runtime Version 10.1 / 9.1

CUDA Capability Major/Minor version number: 6.1

Total amount of global memory: 1998 MBytes (2095382528 bytes)

( 5) Multiprocessors, (128) CUDA Cores/MP: 640 CUDA Cores

GPU Max Clock rate: 1455 MHz (1.46 GHz)

Memory Clock rate: 3504 Mhz

Memory Bus Width: 128-bit

L2 Cache Size: 1048576 bytes

Maximum Texture Dimension Size (x,y,z) 1D=(131072), 2D=(131072, 65536), 3D=(16384, 16384, 16384)

Maximum Layered 1D Texture Size, (num) layers 1D=(32768), 2048 layers

Maximum Layered 2D Texture Size, (num) layers 2D=(32768, 32768), 2048 layers

Total amount of constant memory: 65536 bytes

Total amount of shared memory per block: 49152 bytes

Total number of registers available per block: 65536

Warp size: 32

Maximum number of threads per multiprocessor: 2048

Maximum number of threads per block: 1024

Max dimension size of a thread block (x,y,z): (1024, 1024, 64)

Max dimension size of a grid size (x,y,z): (2147483647, 65535, 65535)

Maximum memory pitch: 2147483647 bytes

Texture alignment: 512 bytes

Concurrent copy and kernel execution: Yes with 2 copy engine(s)

Run time limit on kernels: No

Integrated GPU sharing Host Memory: No

Support host page-locked memory mapping: Yes

Alignment requirement for Surfaces: Yes

Device has ECC support: Disabled

Device supports Unified Addressing (UVA): Yes

Supports Cooperative Kernel Launch: Yes

Supports MultiDevice Co-op Kernel Launch: Yes

Device PCI Domain ID / Bus ID / location ID: 0 / 2 / 0

Compute Mode:

< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 10.1, CUDA Runtime Version = 9.1, NumDevs = 1

Result = PASS

~/samples/NVIDIA_CUDA-9.1_Samples/1_Utilities/deviceQuery$ cat deviceQuery.cpp

* Please refer to the NVIDIA end user license agreement (EULA) associated

* with this source code for terms and conditions that govern your use of

* this software. Any use, reproduction, disclosure, or distribution of

* this software and related documentation outside the terms of the EULA

* is strictly prohibited.

/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */

// Shared Utilities (QA Testing)

// std::system includes

#include <memory>

#include <iostream>

#include <cuda_runtime.h>

#include <helper_cuda.h>

int *pArgc = NULL;

char **pArgv = NULL;

#if CUDART_VERSION < 5000

// CUDA-C includes

#include <cuda.h>

// This function wraps the CUDA Driver API into a template function

template <class T>

inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)

{

CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);

if (CUDA_SUCCESS != error)

{

fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",

error, __FILE__, __LINE__);

exit(EXIT_FAILURE);

}

#endif /* CUDART_VERSION < 5000 */

////////////////////////////////////////////////////////////////////////////////

// Program main

////////////////////////////////////////////////////////////////////////////////

int

main(int argc, char **argv)

{

pArgc = &argc;

pArgv = argv;

printf("%s Starting...\n\n", argv[0]);

printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");

int deviceCount = 0;

cudaError_t error_id = cudaGetDeviceCount(&deviceCount);

if (error_id != cudaSuccess)

{

printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id, cudaGetErrorString(error_id));

printf("Result = FAIL\n");

exit(EXIT_FAILURE);

}

// This function call returns 0 if there are no CUDA capable devices.

if (deviceCount == 0)

{

printf("There are no available device(s) that support CUDA\n");

}

else

{

printf("Detected %d CUDA Capable device(s)\n", deviceCount);

}

int dev, driverVersion = 0, runtimeVersion = 0;

for (dev = 0; dev < deviceCount; ++dev)

{

cudaSetDevice(dev);

cudaDeviceProp deviceProp;

cudaGetDeviceProperties(&deviceProp, dev);

printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);

// Console log

cudaDriverGetVersion(&driverVersion);

cudaRuntimeGetVersion(&runtimeVersion);

printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);

printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);

char msg[256];

SPRINTF(msg, " Total amount of global memory: %.0f MBytes (%llu bytes)\n",

(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);

printf("%s", msg);

printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",

deviceProp.multiProcessorCount,

_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),

_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);

printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);

#if CUDART_VERSION >= 5000

// This is supported in CUDA 5.0 (runtime API device properties)

printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);

printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);

if (deviceProp.l2CacheSize)

{

printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);

}

#else

// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)

int memoryClock;

getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);

printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);

int memBusWidth;

getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);

printf(" Memory Bus Width: %d-bit\n", memBusWidth);

int L2CacheSize;

getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);

if (L2CacheSize)

{

printf(" L2 Cache Size: %d bytes\n", L2CacheSize);

}

#endif

printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",

deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],

deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);

printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",

deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);

printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",

deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);

printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);

printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);

printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);

printf(" Warp size: %d\n", deviceProp.warpSize);

printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);

printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);

printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",

deviceProp.maxThreadsDim[0],

deviceProp.maxThreadsDim[1],

deviceProp.maxThreadsDim[2]);

printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",

deviceProp.maxGridSize[0],

deviceProp.maxGridSize[1],

deviceProp.maxGridSize[2]);

printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);

printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);

printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);

printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");

printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");

printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");

printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");

printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");

#endif

printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");

printf(" Supports Cooperative Kernel Launch: %s\n", deviceProp.cooperativeLaunch ? "Yes" : "No");

printf(" Supports MultiDevice Co-op Kernel Launch: %s\n", deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");

printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);

const char *sComputeMode[] =

{

"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",

"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",

"Prohibited (no host thread can use ::cudaSetDevice() with this device)",

"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",

"Unknown",

NULL

};

printf(" Compute Mode:\n");

printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);

}

// If there are 2 or more GPUs, query to determine whether RDMA is supported

if (deviceCount >= 2)

{

cudaDeviceProp prop[64];

int gpuid[64]; // we want to find the first two GPUs that can support P2P

int gpu_p2p_count = 0;

for (int i=0; i < deviceCount; i++)

{

checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));

// Only boards based on Fermi or later can support P2P

if ((prop[i].major >= 2)

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to support this

&& prop[i].tccDriver

#endif

)

{

// This is an array of P2P capable GPUs

gpuid[gpu_p2p_count++] = i;

}

// Show all the combinations of support P2P GPUs

int can_access_peer;

if (gpu_p2p_count >= 2)

{

for (int i = 0; i < gpu_p2p_count; i++)

{

for (int j = 0; j < gpu_p2p_count; j++)

{

if (gpuid[i] == gpuid[j])

{

continue;

}

checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j]));

printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],

prop[gpuid[j]].name, gpuid[j] ,

can_access_peer ? "Yes" : "No");

}

// csv masterlog info

// *****************************

// exe and CUDA driver name

printf("\n");

std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";

char cTemp[16];

// driver version

sProfileString += ", CUDA Driver Version = ";

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);

#else

sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);

#endif

sProfileString += cTemp;

// Runtime version

sProfileString += ", CUDA Runtime Version = ";

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);

#else

sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);

#endif

sProfileString += cTemp;

// Device count

sProfileString += ", NumDevs = ";

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

sprintf_s(cTemp, 10, "%d", deviceCount);

#else

sprintf(cTemp, "%d", deviceCount);

#endif

sProfileString += cTemp;

sProfileString += "\n";

printf("%s", sProfileString.c_str());

printf("Result = PASS\n");

// finish

exit(EXIT_SUCCESS);

}

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 199,711评论 5赞 468
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 83,932评论 2赞 376
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 146,770评论 0赞 330
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 53,799评论 1赞 271
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 62,697评论 5赞 359
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,069评论 1赞 276
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,535评论 3赞 390
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,200评论 0赞 254
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,353评论 1赞 294
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,290评论 2赞 317
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,331评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,020评论 3赞 315
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,610评论 3赞 303
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,694评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 30,927评论 1赞 255
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 42,330评论 2赞 346
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 41,904评论 2赞 341

device query

推荐阅读更多精彩内容