|
|
@ -2,13 +2,16 @@ |
|
|
|
#define CUDATOOLS_H |
|
|
|
#define CUDATOOLS_H |
|
|
|
|
|
|
|
|
|
|
|
#include "Macros.h" |
|
|
|
#include "Macros.h" |
|
|
|
|
|
|
|
#include <functional> |
|
|
|
#include <iostream> |
|
|
|
#include <iostream> |
|
|
|
#include <string> |
|
|
|
#include <string> |
|
|
|
|
|
|
|
#include <tuple> |
|
|
|
#include <unordered_map> |
|
|
|
#include <unordered_map> |
|
|
|
#include <vector> |
|
|
|
#include <vector> |
|
|
|
|
|
|
|
|
|
|
|
namespace CudaTools { |
|
|
|
namespace CudaTools { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct Event; |
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Simple wrapper for the name of a stream. Its purposes is to allow for |
|
|
|
* Simple wrapper for the name of a stream. Its purposes is to allow for |
|
|
|
* 'streams' to be passed on host code, and allowing for simple syntax |
|
|
|
* 'streams' to be passed on host code, and allowing for simple syntax |
|
|
@ -16,18 +19,19 @@ namespace CudaTools { |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
struct StreamID { |
|
|
|
struct StreamID { |
|
|
|
public: |
|
|
|
public: |
|
|
|
std::string id; |
|
|
|
std::string mId; |
|
|
|
StreamID() : id(""){}; |
|
|
|
StreamID() : mId(""){}; |
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* The constructor for a StreamID. |
|
|
|
* The constructor for a StreamID. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
StreamID(const std::string& id_) : id(id_){}; |
|
|
|
StreamID(const std::string& id_) : mId(id_){}; |
|
|
|
StreamID(const char* id_) : id(id_){}; |
|
|
|
StreamID(const char* id_) : mId(id_){}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void wait() const; /**< Makes host wait for this stream. */ |
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Waits for the stream with this stream ID. |
|
|
|
* Makes this stream wait for this event. Does not block the host. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
void wait() const; |
|
|
|
void wait(const Event& event) const; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
static const StreamID DEF_MEM_STREAM = StreamID{"defaultMemory"}; |
|
|
|
static const StreamID DEF_MEM_STREAM = StreamID{"defaultMemory"}; |
|
|
@ -137,6 +141,20 @@ struct Settings { |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
Settings basic(const size_t threads, const StreamID& stream = DEF_KERNEL_STREAM); |
|
|
|
Settings basic(const size_t threads, const StreamID& stream = DEF_KERNEL_STREAM); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Launches a kernel with the provided function, settings and its arguments. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename F, typename... Args> |
|
|
|
|
|
|
|
StreamID launch(F func, const Kernel::Settings& sett, Args... args) { |
|
|
|
|
|
|
|
#ifdef CUDA |
|
|
|
|
|
|
|
func<<<sett.blockGrid, sett.threadBlock, sett.sharedMemoryBytes, |
|
|
|
|
|
|
|
Manager::get()->stream(sett.stream.mId)>>>(args...); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
func(args...); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
return sett.stream; |
|
|
|
|
|
|
|
} |
|
|
|
}; // namespace Kernel
|
|
|
|
}; // namespace Kernel
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T> class Array; |
|
|
|
template <typename T> class Array; |
|
|
@ -186,29 +204,143 @@ class Shape { |
|
|
|
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& out, const Shape& s); |
|
|
|
std::ostream& operator<<(std::ostream& out, const Shape& s); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* A simple class that manages a CUDA Event. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
struct Event { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
cudaEvent_t mEvent; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
Event(); |
|
|
|
|
|
|
|
~Event(); |
|
|
|
|
|
|
|
void record(const StreamID& stream); /**< Records a event from a stream. */ |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename F, typename... Args> struct FuncHolder { |
|
|
|
|
|
|
|
F mFunc; |
|
|
|
|
|
|
|
std::tuple<Args...> mArgs; |
|
|
|
|
|
|
|
FuncHolder() = delete; |
|
|
|
|
|
|
|
FuncHolder(F func, Args... args) : mFunc(func), mArgs(std::make_tuple(args...)){}; |
|
|
|
|
|
|
|
static void run(void* data) { |
|
|
|
|
|
|
|
FuncHolder<F, Args...>* fh = (FuncHolder<F, Args...>*)(data); |
|
|
|
|
|
|
|
std::apply([fh](auto&&... args) { fh->mFunc(args...); }, fh->mArgs); |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Accessory struct to deal with host callbacks for CUDA Graphs in a nice fashion. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
struct GraphTools { |
|
|
|
|
|
|
|
std::vector<void*> mHostData; |
|
|
|
|
|
|
|
std::vector<Event*> mEvents; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
~GraphTools(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Within a function that is being stream captured, launch a host function that can |
|
|
|
|
|
|
|
* be captured into the graph. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename F, typename... Args> |
|
|
|
|
|
|
|
void launchHostFunction(const StreamID& stream, F func, Args&&... args) { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
FuncHolder<F, Args...>* fh = new FuncHolder<F, Args...>(func, args...); |
|
|
|
|
|
|
|
mHostData.push_back((void*)fh); |
|
|
|
|
|
|
|
cudaHostFn_t run_func = fh->run; |
|
|
|
|
|
|
|
CUDA_CHECK(cudaLaunchHostFunc(Manager::get()->stream(stream), run_func, fh)); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
func(args...); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Makes a new branch in the graph to be run in parallel by a new stream. |
|
|
|
|
|
|
|
* \param orig_stream the original stream to branch from. |
|
|
|
|
|
|
|
* \param branch_stream the stream of the new branch. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
void makeBranch(const StreamID& orig_stream, const StreamID& branch_stream); |
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Joins a existing branch in the graph to collapse a parallel block. |
|
|
|
|
|
|
|
* \param orig_stream the original stream to join the branch to. |
|
|
|
|
|
|
|
* \param branch_stream the stream of the branch to join. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
void joinBranch(const StreamID& orig_stream, const StreamID& branch_stream); |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* A class that manages CUDA Graphs. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
template <typename F, typename... Args> class Graph { |
|
|
|
|
|
|
|
private: |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
cudaGraph_t mGraph; |
|
|
|
|
|
|
|
cudaGraphExec_t mInstance; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
FuncHolder<F, Args...> mFuncHolder; |
|
|
|
|
|
|
|
StreamID mStream; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
|
|
Graph() = delete; |
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* The constructor for a Graph, which captures the function. |
|
|
|
|
|
|
|
* \param func the function to capture. |
|
|
|
|
|
|
|
* \param stream the origin stream to use. |
|
|
|
|
|
|
|
* \param args the arguments of the function. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
Graph(const StreamID& stream, F func, Args... args) |
|
|
|
|
|
|
|
: mFuncHolder(func, args...), mStream(stream) { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK( |
|
|
|
|
|
|
|
cudaStreamBeginCapture(Manager::get()->stream(mStream), cudaStreamCaptureModeGlobal)); |
|
|
|
|
|
|
|
mFuncHolder.run((void*)&mFuncHolder); |
|
|
|
|
|
|
|
CUDA_CHECK(cudaStreamEndCapture(Manager::get()->stream(mStream), &mGraph)); |
|
|
|
|
|
|
|
CUDA_CHECK(cudaGraphInstantiate(&mInstance, mGraph, NULL, NULL, 0)); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
~Graph() { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK(cudaGraphDestroy(mGraph)); |
|
|
|
|
|
|
|
CUDA_CHECK(cudaGraphExecDestroy(mInstance)); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Executes the instantiated graph, or simply runs the function with provided |
|
|
|
|
|
|
|
* arguments if compiling for CPU. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
StreamID execute() const { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
cudaGraphLaunch(mInstance, Manager::get()->stream(mStream)); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
mFuncHolder.run((void*)&mFuncHolder); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
return mStream; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
}; // namespace CudaTools
|
|
|
|
}; // namespace CudaTools
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef CUDATOOLS_IMPLEMENTATION |
|
|
|
#ifdef CUDATOOLS_IMPLEMENTATION |
|
|
|
|
|
|
|
|
|
|
|
namespace CudaTools { |
|
|
|
namespace CudaTools { |
|
|
|
|
|
|
|
|
|
|
|
template <typename T, typename... Args> |
|
|
|
//////////////////////
|
|
|
|
StreamID runKernel(T func, const Kernel::Settings& sett, Args... args) { |
|
|
|
// StreamID Methods //
|
|
|
|
#ifdef CUDA |
|
|
|
//////////////////////
|
|
|
|
func<<<sett.blockGrid, sett.threadBlock, sett.sharedMemoryBytes, |
|
|
|
|
|
|
|
Manager::get()->stream(sett.stream.id)>>>(args...); |
|
|
|
void StreamID::wait() const { Manager::get()->waitFor(mId); } |
|
|
|
#else |
|
|
|
|
|
|
|
func(args...); |
|
|
|
void StreamID::wait(const Event& event) const { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK(cudaStreamWaitEvent(Manager::get()->stream(mId), event.mEvent, 0)); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return sett.stream; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
////////////////////
|
|
|
|
////////////////////
|
|
|
|
// Memory Methods //
|
|
|
|
// Memory Methods //
|
|
|
|
////////////////////
|
|
|
|
////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
void StreamID::wait() const { Manager::get()->waitFor(id); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void* malloc(const size_t size) { |
|
|
|
void* malloc(const size_t size) { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
void* pDevice; |
|
|
|
void* pDevice; |
|
|
@ -228,7 +360,7 @@ void free(void* const pDevice) { |
|
|
|
StreamID push(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) { |
|
|
|
StreamID push(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pDevice, pHost, size, cudaMemcpyHostToDevice, |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pDevice, pHost, size, cudaMemcpyHostToDevice, |
|
|
|
Manager::get()->stream(stream.id))); |
|
|
|
Manager::get()->stream(stream))); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return stream; |
|
|
|
return stream; |
|
|
|
} |
|
|
|
} |
|
|
@ -236,7 +368,7 @@ StreamID push(void* const pHost, void* const pDevice, const size_t size, const S |
|
|
|
StreamID pull(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) { |
|
|
|
StreamID pull(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pHost, pDevice, size, cudaMemcpyDeviceToHost, |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pHost, pDevice, size, cudaMemcpyDeviceToHost, |
|
|
|
Manager::get()->stream(stream.id))); |
|
|
|
Manager::get()->stream(stream))); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return stream; |
|
|
|
return stream; |
|
|
|
} |
|
|
|
} |
|
|
@ -245,7 +377,7 @@ StreamID deviceCopy(void* const pSrc, void* const pDest, const size_t size, |
|
|
|
const StreamID& stream) { |
|
|
|
const StreamID& stream) { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pDest, pSrc, size, cudaMemcpyDeviceToDevice, |
|
|
|
CUDA_CHECK(cudaMemcpyAsync(pDest, pSrc, size, cudaMemcpyDeviceToDevice, |
|
|
|
Manager::get()->stream(stream.id))); |
|
|
|
Manager::get()->stream(stream))); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return stream; |
|
|
|
return stream; |
|
|
|
} |
|
|
|
} |
|
|
@ -289,11 +421,11 @@ Manager::~Manager() { |
|
|
|
|
|
|
|
|
|
|
|
void Manager::waitFor(const StreamID& stream) const { |
|
|
|
void Manager::waitFor(const StreamID& stream) const { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
auto it = mStreams.find(stream.id); |
|
|
|
auto it = mStreams.find(stream.mId); |
|
|
|
if (it != mStreams.end()) { |
|
|
|
if (it != mStreams.end()) { |
|
|
|
CUDA_CHECK(cudaStreamSynchronize(it->second)); |
|
|
|
CUDA_CHECK(cudaStreamSynchronize(it->second)); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
CT_ERROR(true, ("Invalid stream " + stream.id).c_str()); |
|
|
|
CT_ERROR(true, ("Invalid stream " + stream.mId).c_str()); |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
} |
|
|
|
} |
|
|
@ -314,11 +446,11 @@ void Manager::addStream(const std::string& name) { |
|
|
|
|
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
cudaStream_t Manager::stream(const StreamID& stream) const { |
|
|
|
cudaStream_t Manager::stream(const StreamID& stream) const { |
|
|
|
auto it = mStreams.find(stream.id); |
|
|
|
auto it = mStreams.find(stream.mId); |
|
|
|
if (it != mStreams.end()) { |
|
|
|
if (it != mStreams.end()) { |
|
|
|
return it->second; |
|
|
|
return it->second; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
CT_ERROR(true, ("Invalid stream " + stream.id).c_str()); |
|
|
|
CT_ERROR(true, ("Invalid stream " + stream.mId).c_str()); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -407,7 +539,7 @@ void Settings::setSharedMemSize(const size_t bytes) { |
|
|
|
|
|
|
|
|
|
|
|
void Settings::setStream(const StreamID& stream_) { |
|
|
|
void Settings::setStream(const StreamID& stream_) { |
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
stream.id = stream_.id; |
|
|
|
stream = stream_; |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -425,7 +557,8 @@ Settings basic(const size_t threads, const StreamID& stream) { |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return sett; |
|
|
|
return sett; |
|
|
|
} |
|
|
|
} |
|
|
|
} // namespace Kernel
|
|
|
|
|
|
|
|
|
|
|
|
}; // namespace Kernel
|
|
|
|
|
|
|
|
|
|
|
|
/////////////////////
|
|
|
|
/////////////////////
|
|
|
|
// Shape Functions //
|
|
|
|
// Shape Functions //
|
|
|
@ -506,6 +639,57 @@ std::ostream& operator<<(std::ostream& out, const Shape& s) { |
|
|
|
return out << s.dim(s.axes() - 1) << ")"; |
|
|
|
return out << s.dim(s.axes() - 1) << ")"; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////
|
|
|
|
|
|
|
|
// Event Methods //
|
|
|
|
|
|
|
|
///////////////////
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Event::Event() { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK(cudaEventCreate(&mEvent)); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Event::~Event() { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK(cudaEventDestroy(mEvent)); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void Event::record(const StreamID& stream) { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
CUDA_CHECK(cudaEventRecord(mEvent, Manager::get()->stream(stream))); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
////////////////////////
|
|
|
|
|
|
|
|
// GraphTools Methods //
|
|
|
|
|
|
|
|
////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
GraphTools::~GraphTools() { |
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
|
|
|
|
for (void* func : mHostData) { |
|
|
|
|
|
|
|
delete func; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
for (Event* event : mEvents) { |
|
|
|
|
|
|
|
delete event; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void GraphTools::makeBranch(const StreamID& orig_stream, const StreamID& branch_stream) { |
|
|
|
|
|
|
|
Event* event = new Event(); |
|
|
|
|
|
|
|
event->record(orig_stream); |
|
|
|
|
|
|
|
mEvents.push_back(event); |
|
|
|
|
|
|
|
branch_stream.wait(*event); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void GraphTools::joinBranch(const StreamID& orig_stream, const StreamID& branch_stream) { |
|
|
|
|
|
|
|
Event* event = new Event(); |
|
|
|
|
|
|
|
event->record(branch_stream); |
|
|
|
|
|
|
|
mEvents.push_back(event); |
|
|
|
|
|
|
|
orig_stream.wait(*event); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#ifdef CUDACC |
|
|
|
#ifdef CUDACC |
|
|
|
const char* cublasGetErrorString(cublasStatus_t error) { |
|
|
|
const char* cublasGetErrorString(cublasStatus_t error) { |
|
|
|
switch (error) { |
|
|
|
switch (error) { |
|
|
@ -537,7 +721,6 @@ const char* cublasGetErrorString(cublasStatus_t error) { |
|
|
|
return "<unknown>"; |
|
|
|
return "<unknown>"; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
}; // namespace CudaTools
|
|
|
|
}; // namespace CudaTools
|
|
|
|
#endif // CUDATOOLS_IMPLEMENTATION
|
|
|
|
#endif // CUDATOOLS_IMPLEMENTATION
|
|
|
|
|
|
|
|
|
|
|
|