Updated directory structure and Makefile

main
Kenneth Jao 2 years ago
parent 89c6bde219
commit 2d4dbb27b3
  1. 6
      include/CudaTools/Array.h
  2. 0
      include/CudaTools/BLAS.h
  3. 13
      include/CudaTools/Core.h
  4. 0
      include/CudaTools/Macros.h
  5. 0
      include/CudaTools/Types.h
  6. 6
      samples/1_CoreKernel/Makefile
  7. 6
      samples/2_CoreClass/Makefile
  8. 6
      samples/3_ArrayKernel/Makefile
  9. 6
      samples/4_ArrayFunctions/Makefile
  10. 6
      samples/5_SimpleGraph/Makefile
  11. 8
      tests/Makefile
  12. 8
      tests/tests.cu.cpp

@ -187,7 +187,7 @@ template <typename T> class Array {
uint32_t mEndOffset = 0;
void freeArrays() {
HD void freeArrays() {
#ifndef DEVICE
if (not mIsView) {
if (pDevice != nullptr) CudaTools::free(pDevice);
@ -534,7 +534,7 @@ template <typename T> class Array {
/**
* Copies this Array and returns a new Array with the same memory.
*/
HD Array copy() const {
Array copy() const {
Array<T> arr(mShape, (pDevice == nullptr));
auto arr_it = arr.begin();
@ -780,10 +780,12 @@ template <typename T> std::ostream& operator<<(std::ostream& out, const Array<T>
bool negative = false;
for (auto it = arr.begin(); it != arr.end(); ++it) {
T val = *it;
if constexpr (not std::is_unsigned<T>::value) {
if (*it < 0) {
negative = true;
val *= -1;
}
}
max_val = (val > max_val) ? val : max_val;
}
width = std::to_string(max_val).size() + 1;

@ -256,8 +256,8 @@ template <typename F, typename... Args> class Graph {
~Graph() {
#ifdef CUDACC
CUDA_CHECK(cudaGraphDestroy(mGraph));
CUDA_CHECK(cudaGraphExecDestroy(mInstance));
cudaGraphDestroy(mGraph);
cudaGraphExecDestroy(mInstance);
#endif
};
@ -400,9 +400,9 @@ Manager::Manager(const std::vector<std::string>& names) {
Manager::~Manager() {
#ifdef CUDACC
for (auto& it : mStreams) {
CUDA_CHECK(cudaStreamDestroy(it.second));
cudaStreamDestroy(it.second);
}
CUBLAS_CHECK(cublasDestroy(mCublas));
cublasDestroy(mCublas);
// CUSPARSE_CHECK(cusparseDestroy(mCusparse));
#endif
}
@ -640,7 +640,7 @@ Event::Event() {
Event::~Event() {
#ifdef CUDACC
CUDA_CHECK(cudaEventDestroy(mEvent));
cudaEventDestroy(mEvent);
#endif
}
@ -656,9 +656,6 @@ void Event::record(const StreamID& stream) {
GraphManager::~GraphManager() {
#ifdef CUDACC
for (void* func : mHostData) {
delete func;
}
for (Event* event : mEvents) {
delete event;
}

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../
INCLUDE := ../../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../
INCLUDE := ../../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../
INCLUDE := ../../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../
INCLUDE := ../../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE := ../../
INCLUDE := ../../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=

@ -1,9 +1,9 @@
CC := g++-10
NVCC := nvcc
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
NVCC_FLAGS := -MMD -std=c++17 -w -Xcompiler
CFLAGS := -std=c++17 -MMD -Wall -fopenmp
NVCC_FLAGS := -std=c++17 -MMD -Xcudafe="--diag_suppress=20012" -Xcompiler -fopenmp
INCLUDE :=
INCLUDE := ../include/CudaTools
LIBS_DIR :=
LIBS_DIR_GPU := /usr/local/cuda/lib64
LIBS :=
@ -74,7 +74,7 @@ $(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR)
$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR)
$(NVCC) --device-link $^ -o $@
$(NVCC) --device-link -lgomp $^ -o $@
$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR)
$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)

@ -1,9 +1,9 @@
#define CUDATOOLS_IMPLEMENTATION
#define CUDATOOLS_ARRAY_MAX_AXES 8
#include "Array.h"
#include "BLAS.h"
#include "Core.h"
#include "Types.h"
#include <Array.h>
#include <BLAS.h>
#include <Core.h>
#include <Types.h>
#include <Eigen/Core>
#include <chrono>
Loading…
Cancel
Save