cmake_minimum_required(VERSION 2.6)
project(cuda_test)
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
CUDA_ADD_EXECUTABLE(test_cuda main.cpp test_cuda_fun.cu)
VS2013 VC++的.cpp文件调用CUDA的.cu文件中的函数
https://www.cnblogs.com/betterwgo/p/6843272.html
C++调用CUDA(基于VS2015) matlab调用CUDA 以及matlab调用C++ 学习记录
https://blog.csdn.net/penkgao/article/details/78473415
CUDA配置——cpp文件对cu文件的调用
http://ghx0x0.github.io/2015/02/25/cuda-vs2010-configure-cuda/
cuda+VS2015配置
https://blog.csdn.net/qq_38314702/article/details/994143931 ----------好用
VS2017+CUDA9.2 新建项目里没有CUDA选项
https://blog.csdn.net/JiangNingmm/article/details/100849115
Win10 64:vs2015+cuda9.1配置
https://blog.csdn.net/sinat_41789705/article/details/83592783
VS2015+CUDA9.0+WIN10编程环境配置(解决VS没有cuda模块问题)
https://blog.csdn.net/qq_38109843/article/details/89429441 ----------好用
虽然没有出现:
但是可以通过:
cuda_test工程,右键–>项目依赖项–>自定义生成,选择"CUDA9.0"
然后配置:
一般会自动添加
CUDA_PATH = C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0
CUDA_PATH_V9_0 = C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0
CUDA_SDK_PATH = C:\ProgramData\NVIDIA Corporation\CUDA Samples\v9.0
CUDA_LIB_PATH = %CUDA_PATH%\lib\x64
CUDA_BIN_PATH = %CUDA_PATH%\bin
CUDA_SDK_BIN_PATH = %CUDA_SDK_PATH%\bin\win64
CUDA_SDK_LIB_PATH = %CUDA_SDK_PATH%\common\lib\x64
$(CUDA_PATH)\include
$(CUDA_PATH)\lib\x64
cublas.lib cuda.lib cudadevrt.lib cudart.lib cudart_static.lib nvcuvid.lib OpenCL.lib
kernel.h
#pragma once
#include "cuda_runtime.h"
#include "device_functions.h"
#include "cublas_v2.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <ctime>
#include <windows.h>
#include <math.h>
#include <queue>
using namespace std;
int test_main();
kernel.cu
#include "kernel.h"
struct Point {
float x;
float y;
int cluster;
int noise; //-1 noise
};
int eps = 2;//neighborhood radius
int min_nb = 3;
Point host_sample[500];//312
int block_num, thread_num;
float __device__ dev_euclidean_distance(const Point &src, const Point &dest) {
float res = (src.x - dest.x) * (src.x - dest.x) + (src.y - dest.y) * (src.y - dest.y);
return sqrt(res);
}
/*to get the total list*/
void __global__ dev_region_query(Point* sample, int num, int* neighbors, int eps, int min_nb) {
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int line,col,pointer = tid;
unsigned int count;
while (pointer < num * num) {//全场唯一id
line = pointer / num;
col = pointer % num;
float radius;
if (line <= col) {
radius = dev_euclidean_distance(sample[line], sample[col]);
if (radius < eps) {
neighbors[pointer] = 1;
}
neighbors[col * num + line] = neighbors[pointer];//对角线
}
pointer += blockDim.x * gridDim.x;
}
__syncthreads();
pointer = tid;
while (pointer < num) {
count = 0;
line = pointer * num;
for (int i = 0; i < num; i++) {
if (pointer != i && neighbors[line+i]) {//除了p点外邻域元素个数
count++;
}
}
if (count >= min_nb) {
sample[pointer].noise++;
}
pointer += blockDim.x * gridDim.x;
}
}
void host_algorithm_dbscan(Point* host_sample, int num) {
/*sample*/
Point* cuda_sample;
cudaMalloc((void**)&cuda_sample, num * sizeof(Point));
cudaMemcpy(cuda_sample, host_sample, num * sizeof(Point), cudaMemcpyHostToDevice);
/*neighbor list*/
int *host_neighbor = new int[num*num]();
int *dev_neighbor;
cudaMalloc((void**)&dev_neighbor, num * num * sizeof(int));
dev_region_query << <block_num, thread_num >> > (cuda_sample, num, dev_neighbor, eps, min_nb);
cudaMemcpy(host_sample, cuda_sample, num * sizeof(Point), cudaMemcpyDeviceToHost);
cudaMemcpy(host_neighbor, dev_neighbor, num * num * sizeof(int), cudaMemcpyDeviceToHost);
queue<int> expand;
int cur_cluster = 0;
for (int i = 0; i < num; i++) {
if (host_sample[i].noise >= 0 && host_sample[i].cluster < 1) {
host_sample[i].cluster = ++cur_cluster;
int src = i * num;
for (int j = 0; j < num; j++) {
if (host_neighbor[src + j]) {
host_sample[j].cluster = cur_cluster;
expand.push(j);
}
}
while (!expand.empty()) {/*expand the cluster*/
if (host_sample[expand.front()].noise >= 0) {
src = expand.front() * num;
for (int j = 0; j < num; j++) {
if (host_neighbor[src + j] && host_sample[j].cluster < 1) {
host_sample[j].cluster = cur_cluster;
expand.push(j);
}
}
}
expand.pop();
}
}
}
cudaFree(cuda_sample);cudaFree(dev_neighbor);
}
int test_main() {
clock_t starts, finishs;
double duration;
starts = clock();
ifstream fin("3spiral.txt");
ofstream fout;
fout.open("result.txt");
int sample_num = 0;
double a, b;
while (fin >> a >> b) {
host_sample[sample_num].x = a;
host_sample[sample_num].y = b;
host_sample[sample_num].noise = -1;
host_sample[sample_num].cluster = -1;
sample_num++;
}
cout << "------>TOTAL SAMPLE NUMB0->" << sample_num << "<-----" << endl;
cout << "------>BL0CK=10 & THREAD=100<-------- "<< endl;
block_num = 10;
thread_num = 100;
cout<<"CALCULATING BY CUDA GTX TITAN X......\n"<<endl;
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start, 0);
host_algorithm_dbscan(host_sample, sample_num);
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float time;
cudaEventElapsedTime(&time, start, end);
cout<<"time: "<< time <<"ms --device\n"<<endl;
finishs = clock();
duration = (double)(finishs - starts) / CLOCKS_PER_SEC;
cout << duration << "s --total" << endl;
for (int i = 0; i < sample_num; i++) {
fout <<"["<<host_sample[i].x << "," << host_sample[i].y << "] -->"<<host_sample[i].cluster<< endl;
}
fout.close();
system("pause");
return 0;
}
main.cpp
#include "kernel.h"
int main(int argc, char* argv[])
{
test_main();
return 0;
}
Ubuntu 16.04开发CUDA程序入门(一)
https://blog.csdn.net/u011988573/article/details/69665098
Ubuntu 16.04开发CUDA程序入门(二)
https://blog.csdn.net/u011988573/article/details/69787614
nvcc gcc g++混合编译器编程
https://blog.csdn.net/bendanban/article/details/8518382
Ubuntu 16.04 中 QT creator 运行CUDA程序的配置
https://blog.csdn.net/u013554213/article/details/88838137
Qt 之 pro 配置详解
https://blog.csdn.net/liang19890820/article/details/51774724
来源:CSDN
作者:知识在于分享
链接:https://blog.csdn.net/baidu_40840693/article/details/103795175