From 2d4dbb27b3387159287327c3604fd5c258c0590f Mon Sep 17 00:00:00 2001 From: Kenneth Jao Date: Fri, 16 Jun 2023 02:07:49 -0500 Subject: [PATCH] Updated directory structure and Makefile --- Array.h => include/CudaTools/Array.h | 12 +++++++----- BLAS.h => include/CudaTools/BLAS.h | 0 Core.h => include/CudaTools/Core.h | 13 +++++-------- Macros.h => include/CudaTools/Macros.h | 0 Types.h => include/CudaTools/Types.h | 0 samples/1_CoreKernel/Makefile | 6 +++--- samples/2_CoreClass/Makefile | 6 +++--- samples/3_ArrayKernel/Makefile | 6 +++--- samples/4_ArrayFunctions/Makefile | 6 +++--- samples/5_SimpleGraph/Makefile | 6 +++--- Makefile => tests/Makefile | 8 ++++---- tests.cu.cpp => tests/tests.cu.cpp | 8 ++++---- 12 files changed, 35 insertions(+), 36 deletions(-) rename Array.h => include/CudaTools/Array.h (99%) rename BLAS.h => include/CudaTools/BLAS.h (100%) rename Core.h => include/CudaTools/Core.h (98%) rename Macros.h => include/CudaTools/Macros.h (100%) rename Types.h => include/CudaTools/Types.h (100%) rename Makefile => tests/Makefile (92%) rename tests.cu.cpp => tests/tests.cu.cpp (99%) diff --git a/Array.h b/include/CudaTools/Array.h similarity index 99% rename from Array.h rename to include/CudaTools/Array.h index f8fbcb7..3772a8d 100644 --- a/Array.h +++ b/include/CudaTools/Array.h @@ -187,7 +187,7 @@ template class Array { uint32_t mEndOffset = 0; - void freeArrays() { + HD void freeArrays() { #ifndef DEVICE if (not mIsView) { if (pDevice != nullptr) CudaTools::free(pDevice); @@ -534,7 +534,7 @@ template class Array { /** * Copies this Array and returns a new Array with the same memory. */ - HD Array copy() const { + Array copy() const { Array arr(mShape, (pDevice == nullptr)); auto arr_it = arr.begin(); @@ -780,9 +780,11 @@ template std::ostream& operator<<(std::ostream& out, const Array bool negative = false; for (auto it = arr.begin(); it != arr.end(); ++it) { T val = *it; - if (*it < 0) { - negative = true; - val *= -1; + if constexpr (not std::is_unsigned::value) { + if (*it < 0) { + negative = true; + val *= -1; + } } max_val = (val > max_val) ? val : max_val; } diff --git a/BLAS.h b/include/CudaTools/BLAS.h similarity index 100% rename from BLAS.h rename to include/CudaTools/BLAS.h diff --git a/Core.h b/include/CudaTools/Core.h similarity index 98% rename from Core.h rename to include/CudaTools/Core.h index 17148c7..d6f16c5 100644 --- a/Core.h +++ b/include/CudaTools/Core.h @@ -256,8 +256,8 @@ template class Graph { ~Graph() { #ifdef CUDACC - CUDA_CHECK(cudaGraphDestroy(mGraph)); - CUDA_CHECK(cudaGraphExecDestroy(mInstance)); + cudaGraphDestroy(mGraph); + cudaGraphExecDestroy(mInstance); #endif }; @@ -400,9 +400,9 @@ Manager::Manager(const std::vector& names) { Manager::~Manager() { #ifdef CUDACC for (auto& it : mStreams) { - CUDA_CHECK(cudaStreamDestroy(it.second)); + cudaStreamDestroy(it.second); } - CUBLAS_CHECK(cublasDestroy(mCublas)); + cublasDestroy(mCublas); // CUSPARSE_CHECK(cusparseDestroy(mCusparse)); #endif } @@ -640,7 +640,7 @@ Event::Event() { Event::~Event() { #ifdef CUDACC - CUDA_CHECK(cudaEventDestroy(mEvent)); + cudaEventDestroy(mEvent); #endif } @@ -656,9 +656,6 @@ void Event::record(const StreamID& stream) { GraphManager::~GraphManager() { #ifdef CUDACC - for (void* func : mHostData) { - delete func; - } for (Event* event : mEvents) { delete event; } diff --git a/Macros.h b/include/CudaTools/Macros.h similarity index 100% rename from Macros.h rename to include/CudaTools/Macros.h diff --git a/Types.h b/include/CudaTools/Types.h similarity index 100% rename from Types.h rename to include/CudaTools/Types.h diff --git a/samples/1_CoreKernel/Makefile b/samples/1_CoreKernel/Makefile index 6725ac7..91cdeda 100644 --- a/samples/1_CoreKernel/Makefile +++ b/samples/1_CoreKernel/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := ../../ +INCLUDE := ../../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := diff --git a/samples/2_CoreClass/Makefile b/samples/2_CoreClass/Makefile index b4cea59..94a165a 100644 --- a/samples/2_CoreClass/Makefile +++ b/samples/2_CoreClass/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := ../../ +INCLUDE := ../../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := diff --git a/samples/3_ArrayKernel/Makefile b/samples/3_ArrayKernel/Makefile index 32b8583..fa78919 100644 --- a/samples/3_ArrayKernel/Makefile +++ b/samples/3_ArrayKernel/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := ../../ +INCLUDE := ../../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := diff --git a/samples/4_ArrayFunctions/Makefile b/samples/4_ArrayFunctions/Makefile index cf48f56..7465e88 100644 --- a/samples/4_ArrayFunctions/Makefile +++ b/samples/4_ArrayFunctions/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := ../../ +INCLUDE := ../../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := diff --git a/samples/5_SimpleGraph/Makefile b/samples/5_SimpleGraph/Makefile index 74d9d7f..8265fc1 100644 --- a/samples/5_SimpleGraph/Makefile +++ b/samples/5_SimpleGraph/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := ../../ +INCLUDE := ../../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := diff --git a/Makefile b/tests/Makefile similarity index 92% rename from Makefile rename to tests/Makefile index 91c8eae..d6c9864 100644 --- a/Makefile +++ b/tests/Makefile @@ -1,9 +1,9 @@ CC := g++-10 NVCC := nvcc -CFLAGS := -Wall -std=c++17 -fopenmp -MMD -NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler +CFLAGS := -std=c++17 -MMD -Wall -fopenmp +NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp -INCLUDE := +INCLUDE := ../include/CudaTools LIBS_DIR := LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS := @@ -74,7 +74,7 @@ $(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR) $(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU) $(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR) - $(NVCC) --device-link $^ -o $@ + $(NVCC) --device-link -lgomp $^ -o $@ $(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR) $(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC) diff --git a/tests.cu.cpp b/tests/tests.cu.cpp similarity index 99% rename from tests.cu.cpp rename to tests/tests.cu.cpp index e1d8974..36b3a7a 100644 --- a/tests.cu.cpp +++ b/tests/tests.cu.cpp @@ -1,9 +1,9 @@ #define CUDATOOLS_IMPLEMENTATION #define CUDATOOLS_ARRAY_MAX_AXES 8 -#include "Array.h" -#include "BLAS.h" -#include "Core.h" -#include "Types.h" +#include +#include +#include +#include #include #include