Compiling Cuda code in Qt Creator on Windows

后端 未结 3 1229
终归单人心
终归单人心 2020-11-29 02:03

I have been trying for days to get a Qt project file running on a 32-bit Windows 7 system, in which I want/need to include Cuda code. This combination of things is either so

相关标签:
3条回答
  • 2020-11-29 02:54

    Using msvc 2010 I found that the linker does not accept the -l parameter, however nvcc needs it. Therefore I made a simple change in the .pro file:

    # Add the necessary libraries
    CUDA_LIBS = cuda cudart
    # The following makes sure all path names (which often include spaces) are put between quotation marks
    CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
    # LIBRARIES IN FORMAT NEEDED BY NVCC
    NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
    # LIBRARIES IN FORMAT NEEDED BY VISUAL C++ LINKER
    LIBS += $$join(CUDA_LIBS,'.lib ', '', '.lib')
    

    And the nvcc command (release version):

    cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
    

    $$NVCC_LIBS was inserted instead of $$LIBS. The whole .pro file, which works for me:

    QT       += core
    
    QT       -= gui
    
    TARGET = TestCUDA
    CONFIG   += console
    CONFIG   -= app_bundle
    
    TEMPLATE = app
    
    # Define output directories
    DESTDIR = release
    OBJECTS_DIR = release/obj
    CUDA_OBJECTS_DIR = release/cuda
    
    # Source files
    SOURCES += main.cpp
    
    # This makes the .cu files appear in your project
    OTHER_FILES +=  vectorAddition.cu
    
    # CUDA settings <-- may change depending on your system
    CUDA_SOURCES += vectorAddition.cu
    #CUDA_SDK = "C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.2/C"   # Path to cuda SDK install
    CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v5.0"           # Path to cuda toolkit install
    SYSTEM_NAME = win32         # Depending on your system either 'Win32', 'x64', or 'Win64'
    SYSTEM_TYPE = 32            # '32' or '64', depending on your system
    CUDA_ARCH = sm_11           # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
    NVCC_OPTIONS = --use_fast_math
    
    # include paths
    INCLUDEPATH += $$CUDA_DIR/include
                   #$$CUDA_SDK/common/inc/ \
                   #$$CUDA_SDK/../shared/inc/
    
    # library directories
    QMAKE_LIBDIR += $$CUDA_DIR/lib/$$SYSTEM_NAME
                    #$$CUDA_SDK/common/lib/$$SYSTEM_NAME \
                    #$$CUDA_SDK/../shared/lib/$$SYSTEM_NAME
    
    
    # The following library conflicts with something in Cuda
    QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
    QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
    
    # Add the necessary libraries
    CUDA_LIBS = cuda cudart
    # The following makes sure all path names (which often include spaces) are put between quotation marks
    CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
    NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
    LIBS += $$join(CUDA_LIBS,'.lib ', '', '.lib')
    
    # Configuration of the Cuda compiler
    CONFIG(debug, debug|release) {
        # Debug mode
        cuda_d.input = CUDA_SOURCES
        cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
        cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
        cuda_d.dependency_type = TYPE_C
        QMAKE_EXTRA_COMPILERS += cuda_d
    }
    else {
        # Release mode
        cuda.input = CUDA_SOURCES
        cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
        cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
        cuda.dependency_type = TYPE_C
        QMAKE_EXTRA_COMPILERS += cuda
    }
    

    I also added some essential declarations, i.e. QT += core for the app to work, and also removed the SDK part, which I did not find useful in this case.

    0 讨论(0)
  • 2020-11-29 02:55

    So I finally managed to assemble a .pro file that works on my and probably on all Windows systems. The following is an easy test programme that should probably do the trick. The following is a small project file plus test programme that works at least on my system.

    The file system looks as follows:

    TestCUDA \
        TestCUDA.pro
        main.cpp
        vectorAddition.cu
    

    The project file reads:

    TARGET = TestCUDA
    
    # Define output directories
    DESTDIR = release
    OBJECTS_DIR = release/obj
    CUDA_OBJECTS_DIR = release/cuda
    
    # Source files
    SOURCES += src/main.cpp
    
    # This makes the .cu files appear in your project
    OTHER_FILES +=  vectorAddition.cu
    
    # CUDA settings <-- may change depending on your system
    CUDA_SOURCES += src/cuda/vectorAddition.cu
    CUDA_SDK = "C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.2/C"   # Path to cuda SDK install
    CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v4.2"            # Path to cuda toolkit install
    SYSTEM_NAME = Win32         # Depending on your system either 'Win32', 'x64', or 'Win64'
    SYSTEM_TYPE = 32            # '32' or '64', depending on your system
    CUDA_ARCH = sm_11           # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
    NVCC_OPTIONS = --use_fast_math
    
    # include paths
    INCLUDEPATH += $$CUDA_DIR/include \
                   $$CUDA_SDK/common/inc/ \
                   $$CUDA_SDK/../shared/inc/
    
    # library directories
    QMAKE_LIBDIR += $$CUDA_DIR/lib/$$SYSTEM_NAME \
                    $$CUDA_SDK/common/lib/$$SYSTEM_NAME \
                    $$CUDA_SDK/../shared/lib/$$SYSTEM_NAME
    # Add the necessary libraries
    LIBS += -lcuda -lcudart
    
    # The following library conflicts with something in Cuda
    QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
    QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
    
    # The following makes sure all path names (which often include spaces) are put between quotation marks
    CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
    
    # Configuration of the Cuda compiler
    CONFIG(debug, debug|release) {
        # Debug mode
        cuda_d.input = CUDA_SOURCES
        cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
        cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
        cuda_d.dependency_type = TYPE_C
        QMAKE_EXTRA_COMPILERS += cuda_d
    }
    else {
        # Release mode
        cuda.input = CUDA_SOURCES
        cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
        cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
        cuda.dependency_type = TYPE_C
        QMAKE_EXTRA_COMPILERS += cuda
    }
    

    Note the QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib: it took me a long time to figure out, but this library seems to clash with other things in Cuda, which produces strange linking warnings and errors. If someone has an explanation for this, and potentially a prettier way to get around this, I'd like to hear it.

    Also, since Windows file paths often include spaces (and NVIDIA's SDK by default does so too), it is necessary to artificially add quotation marks around the include paths. Again, if someone knows a more elegant way of solving this problem, I'd be interested to know.

    The main.cpp file looks like this:

    #include <cuda.h>
    #include <builtin_types.h>
    #include <drvapi_error_string.h>
    
    #include <QtCore/QCoreApplication>
    #include <QDebug>
    
    // Forward declare the function in the .cu file
    void vectorAddition(const float* a, const float* b, float* c, int n);
    
    void printArray(const float* a, const unsigned int n) {
        QString s = "(";
        unsigned int ii;
        for (ii = 0; ii < n - 1; ++ii)
            s.append(QString::number(a[ii])).append(", ");
        s.append(QString::number(a[ii])).append(")");
    
        qDebug() << s;
    }
    
    int main(int argc, char* argv [])
    {
        QCoreApplication(argc, argv);
    
        int deviceCount = 0;
        int cudaDevice = 0;
        char cudaDeviceName [100];
    
        unsigned int N = 50;
        float *a, *b, *c;
    
        cuInit(0);
        cuDeviceGetCount(&deviceCount);
        cuDeviceGet(&cudaDevice, 0);
        cuDeviceGetName(cudaDeviceName, 100, cudaDevice);
        qDebug() << "Number of devices: " << deviceCount;
        qDebug() << "Device name:" << cudaDeviceName;
    
        a = new float [N];    b = new float [N];    c = new float [N];
        for (unsigned int ii = 0; ii < N; ++ii) {
            a[ii] = qrand();
            b[ii] = qrand();
        }
    
        // This is the function call in which the kernel is called
        vectorAddition(a, b, c, N);
    
        qDebug() << "input a:"; printArray(a, N);
        qDebug() << "input b:"; printArray(b, N);
        qDebug() << "output c:"; printArray(c, N);
    
        if (a) delete a;
        if (b) delete b;
        if (c) delete c;
    }
    

    The Cuda file vectorAddition.cu, which describes a simple vector addition, look like this:

    #include <cuda.h>
    #include <builtin_types.h>
    
    extern "C"
    __global__ void vectorAdditionCUDA(const float* a, const float* b, float* c, int n)
    {
        int ii = blockDim.x * blockIdx.x + threadIdx.x;
        if (ii < n)
            c[ii] = a[ii] + b[ii];
    }
    
    void vectorAddition(const float* a, const float* b, float* c, int n) {
        float *a_cuda, *b_cuda, *c_cuda;
        unsigned int nBytes = sizeof(float) * n;
        int threadsPerBlock = 256;
        int blocksPerGrid   = (n + threadsPerBlock - 1) / threadsPerBlock;
    
        // allocate and copy memory into the device
        cudaMalloc((void **)& a_cuda, nBytes);
        cudaMalloc((void **)& b_cuda, nBytes);
        cudaMalloc((void **)& c_cuda, nBytes);
        cudaMemcpy(a_cuda, a, nBytes, cudaMemcpyHostToDevice);
        cudaMemcpy(b_cuda, b, nBytes, cudaMemcpyHostToDevice);
    
        vectorAdditionCUDA<<<blocksPerGrid, threadsPerBlock>>>(a_cuda, b_cuda, c_cuda, n);
    
        // load the answer back into the host
        cudaMemcpy(c, c_cuda, nBytes, cudaMemcpyDeviceToHost);
    
        cudaFree(a_cuda);
        cudaFree(b_cuda);
        cudaFree(c_cuda);
    }
    

    If you get this to work, then more complicated examples are self-evident, I think.

    Edit (24-1-2013): I added the QMAKE_LFLAGS_DEBUG = /NODEFAULTLIB:msvcrtd.lib and the CONFIG(debug) with the extra D_DEBUG flag, such that it also compiles in debug mode.

    0 讨论(0)
  • 2020-11-29 02:56

    I tried this combination to work. Could not make it work due to a number of dependencies in my project. My final solution was to break the application into two separate applications on Windows 1)

    1. CUDA application developed in VC and running as a service/DLL in Windows
    2. GUI interface developed in QT and using the DLL for CUDA related tasks.

    Hope it saves some time of others

    0 讨论(0)
提交回复
热议问题