通过CUDA的NVML库获取显卡的使用率-针对NVIDIA显卡

程序员文章站 2022-03-08 15:25:36

在使用CUDA的NVML库之前，需要安装CUDA SDK.需要确定文件夹“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include”下包含nvml.h文件，并将该地址包含在工程中。需要确定文件夹“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include下”包含nvml.lib文件，并将该地址包含在工程中。将文件夹“C:\Program Files\NVIDIA C...

在使用CUDA的NVML库之前，需要安装CUDA SDK.
需要确定文件夹“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include”下包含nvml.h文件，并将该地址包含在工程中。
需要确定文件夹“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include下”包含nvml.lib文件，并将该地址包含在工程中。
将文件夹“C:\Program Files\NVIDIA Corporation\NVSMI”下的nvml.dll拷贝到工程目录下。

在VS2010上编写的代码如下：

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <ctime>
#include <string.h>
#include "cuda_runtime.h"
#include <windows.h>
#include "nvml.h"

int main(int argc, char* argv[])
{
    nvmlReturn_t result0;
    unsigned int device_count;
    result0 = nvmlInit();

    result0 = nvmlDeviceGetCount(&device_count);
    if (NVML_SUCCESS != result0)
    {
        std::cout << "Failed to query device count: " << nvmlErrorString(result0);
        return -1;
    }
    std::cout << "Found" << device_count <<" device" << std::endl;
    std::stringstream availGPUTxt;

    for (int i=0; i<device_count; i++)
    {
        nvmlDevice_t device;
        nvmlPciInfo_t pci;
        result0 = nvmlDeviceGetHandleByIndex(i, &device);
        if (NVML_SUCCESS != result0)
        {
            std::cout << "Failed to get device count: " << nvmlErrorString(result0);
            return -1;
        }

        char name[NVML_DEVICE_NAME_BUFFER_SIZE];
        result0 = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE);
        if (NVML_SUCCESS != result0)
        {
            std::cout << "Failed to get device name: " << nvmlErrorString(result0);
            return -1;
        }

        nvmlUtilization_t utilization;
        result0 = nvmlDeviceGetUtilizationRates(device, &utilization);
        if (NVML_SUCCESS != result0)
        {
            std::cout << "Failed to get utilization rates: " << nvmlErrorString(result0);
            return -1;
        }

        nvmlMemory_t memory;
        result0 = nvmlDeviceGetMemoryInfo(device, &memory);
        if (NVML_SUCCESS != result0)
        {
            std::cout << "Failed to get memory info: " << nvmlErrorString(result0);
            return -1;
        }

        availGPUTxt << "-----------------------------------------------------------" << std::endl;
        availGPUTxt << "CUDA NVML" << std::endl;
        availGPUTxt << "第" << i << "块显卡" << std::endl;
        availGPUTxt << "GPU name:" << name << std::endl;
        availGPUTxt << "----- 使用率: -----" << std::endl;
        availGPUTxt << "GPU使用率:" << utilization.gpu << std::endl;
        availGPUTxt << "显存使用率:" << utilization.memory << std::endl;
         // Byte->KB->MB->GB
        availGPUTxt << "全部可用显存:" << (float)(memory.total)/1024.0f/1024.0f/1024.0f << "GB" << std::endl;
        availGPUTxt << "剩余可用显存:" << (float)(memory.free)/1024.0f/1024.0f/1024.0f << "GB" << std::endl;
        availGPUTxt << "-----------------------------------------------------------" << std::endl;
    }
    
    std::string fileFitBetaName = "E:\\test\\availGPUTest.txt";
    std::ofstream fileFitBetaInfo(fileFitBetaName, std::ios::out|std::ios::binary|std::ios::ate);
    fileFitBetaInfo.write((char*)availGPUTxt.str().c_str(), (int)availGPUTxt.str().length());
    fileFitBetaInfo.close();

    return 0;
}

在本地输出的结果为：
通过CUDA的NVML库获取显卡的使用率-针对NVIDIA显卡
在以上代码中，使用NVML的nvmlDeviceGetUtilizationRates函数获得显卡的使用率，单位是百分比。此外，还可用使用GPU-Z查看显卡的使用。其他NVML的显卡信息查看函数可用查看NVML查看显卡信息或者直接查看官方文档。
上面的程序使用nvmlDeviceGetMemoryInfo函数获得显卡的全部可用内存和剩余可用内存。获得显卡的可用内存也可用使用CUDA Runtime API的cudaMemGetInfo函数。代码如下：

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <ctime>
#include <string.h>
#include "cuda_runtime.h"
#include <windows.h>

int main(int argc, char* argv[])
{

    int id = -1;
    cudaGetDevice(&id);
    printf("cuda id=%d\n", id);

    unsigned int flags;
    cudaGetDeviceFlags(&flags);
    
    int count;
    if(cudaGetDeviceCount(&count)|| count < 1)
    {
        printf("cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
        return -3;
    }
    
    std::stringstream availGPUTxt;
    for (int i=0; i<count; i++)
    {
        cudaSetDevice(i);
        //cudaDeviceReset();

        cudaDeviceProp prop;
        cudaGetDeviceProperties(&prop, i);

        cudaDeviceAttr attr;
        int value;
        cudaDeviceGetAttribute(&value, cudaDevAttrMaxTextureCubemapLayeredLayers, i);

        size_t szmen = 1024*1024*360;
        float* ptrDevice = NULL;
        if (i==0)
            cudaMalloc((void**)&ptrDevice, szmen);

        size_t available, total;
        cudaMemGetInfo(&available, &total);

        availGPUTxt << "-----------------------------------------------------------" << std::endl;
        availGPUTxt << "CUDA Runtime API" << std::endl;
        availGPUTxt << "第" << i << "块显卡" << std::endl;
        // Byte->KB->MB->GB
        availGPUTxt <<"GPU 全部可用显存:"<< (float)(total)/1024.0f/1024.0f/1024.0f << "GB" << std::endl;
        availGPUTxt <<"GPU 剩余可用显存:"<< (float)(available)/1024.0f/1024.0f/1024.0f << "GB" << std::endl;
        availGPUTxt << "-----------------------------------------------------------" << std::endl;
    }

    std::string fileFitBetaName = "E:\\test\\availGPUTest.txt";
    std::ofstream fileFitBetaInfo(fileFitBetaName, std::ios::out|std::ios::binary|std::ios::ate);
    fileFitBetaInfo.write((char*)availGPUTxt.str().c_str(), (int)availGPUTxt.str().length());
    fileFitBetaInfo.close();

    return 0;
}

在本地的输出结果为：
通过CUDA的NVML库获取显卡的使用率-针对NVIDIA显卡

本文地址：https://blog.csdn.net/dsn0606/article/details/109626821

上一篇：搭建一个vue项目——初始化一个项目（1）

下一篇：愚人节都过了，就不要开玩笑了