Updated directory structure and Makefile

main
Kenneth Jao 2 years ago
parent 89c6bde219
commit 2d4dbb27b3
  1. 12
      include/CudaTools/Array.h
  2. 0
      include/CudaTools/BLAS.h
  3. 13
      include/CudaTools/Core.h
  4. 0
      include/CudaTools/Macros.h
  5. 0
      include/CudaTools/Types.h
  6. 6
      samples/1_CoreKernel/Makefile
  7. 6
      samples/2_CoreClass/Makefile
  8. 6
      samples/3_ArrayKernel/Makefile
  9. 6
      samples/4_ArrayFunctions/Makefile
  10. 6
      samples/5_SimpleGraph/Makefile
  11. 8
      tests/Makefile
  12. 8
      tests/tests.cu.cpp

@ -187,7 +187,7 @@ template <typename T> class Array {
uint32_t mEndOffset = 0; uint32_t mEndOffset = 0;
void freeArrays() { HD void freeArrays() {
#ifndef DEVICE #ifndef DEVICE
if (not mIsView) { if (not mIsView) {
if (pDevice != nullptr) CudaTools::free(pDevice); if (pDevice != nullptr) CudaTools::free(pDevice);
@ -534,7 +534,7 @@ template <typename T> class Array {
/** /**
* Copies this Array and returns a new Array with the same memory. * Copies this Array and returns a new Array with the same memory.
*/ */
HD Array copy() const { Array copy() const {
Array<T> arr(mShape, (pDevice == nullptr)); Array<T> arr(mShape, (pDevice == nullptr));
auto arr_it = arr.begin(); auto arr_it = arr.begin();
@ -780,9 +780,11 @@ template <typename T> std::ostream& operator<<(std::ostream& out, const Array<T>
bool negative = false; bool negative = false;
for (auto it = arr.begin(); it != arr.end(); ++it) { for (auto it = arr.begin(); it != arr.end(); ++it) {
T val = *it; T val = *it;
if (*it < 0) { if constexpr (not std::is_unsigned<T>::value) {
negative = true; if (*it < 0) {
val *= -1; negative = true;
val *= -1;
}
} }
max_val = (val > max_val) ? val : max_val; max_val = (val > max_val) ? val : max_val;
} }

@ -256,8 +256,8 @@ template <typename F, typename... Args> class Graph {
~Graph() { ~Graph() {
#ifdef CUDACC #ifdef CUDACC
CUDA_CHECK(cudaGraphDestroy(mGraph)); cudaGraphDestroy(mGraph);
CUDA_CHECK(cudaGraphExecDestroy(mInstance)); cudaGraphExecDestroy(mInstance);
#endif #endif
}; };
@ -400,9 +400,9 @@ Manager::Manager(const std::vector<std::string>& names) {
Manager::~Manager() { Manager::~Manager() {
#ifdef CUDACC #ifdef CUDACC
for (auto& it : mStreams) { for (auto& it : mStreams) {
CUDA_CHECK(cudaStreamDestroy(it.second)); cudaStreamDestroy(it.second);
} }
CUBLAS_CHECK(cublasDestroy(mCublas)); cublasDestroy(mCublas);
// CUSPARSE_CHECK(cusparseDestroy(mCusparse)); // CUSPARSE_CHECK(cusparseDestroy(mCusparse));
#endif #endif
} }
@ -640,7 +640,7 @@ Event::Event() {
Event::~Event() { Event::~Event() {
#ifdef CUDACC #ifdef CUDACC
CUDA_CHECK(cudaEventDestroy(mEvent)); cudaEventDestroy(mEvent);
#endif #endif
} }
@ -656,9 +656,6 @@ void Event::record(const StreamID& stream) {
GraphManager::~GraphManager() { GraphManager::~GraphManager() {
#ifdef CUDACC #ifdef CUDACC
for (void* func : mHostData) {
delete func;
}
for (Event* event : mEvents) { for (Event* event : mEvents) {
delete event; delete event;
} }

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../ INCLUDE := ../../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../ INCLUDE := ../../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../ INCLUDE := ../../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../ INCLUDE := ../../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../ INCLUDE := ../../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=

@ -1,9 +1,9 @@
CC := g++-10 CC := g++-10
NVCC := nvcc NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := INCLUDE := ../include/CudaTools
LIBS_DIR := LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64 LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS := LIBS :=
@ -74,7 +74,7 @@ $(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR)
$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU) $(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR) $(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR)
$(NVCC) --device-link $^ -o $@ $(NVCC) --device-link -lgomp $^ -o $@
$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR) $(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR)
$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC) $(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)

@ -1,9 +1,9 @@
#define CUDATOOLS_IMPLEMENTATION #define CUDATOOLS_IMPLEMENTATION
#define CUDATOOLS_ARRAY_MAX_AXES 8 #define CUDATOOLS_ARRAY_MAX_AXES 8
#include "Array.h" #include <Array.h>
#include "BLAS.h" #include <BLAS.h>
#include "Core.h" #include <Core.h>
#include "Types.h" #include <Types.h>
#include <Eigen/Core> #include <Eigen/Core>
#include <chrono> #include <chrono>
Loading…
Cancel
Save