diff --git a/BLAS.h b/BLAS.h index 8369eb1..1164ce9 100644 --- a/BLAS.h +++ b/BLAS.h @@ -304,8 +304,8 @@ StreamID GEMV(const T alpha, const Array& A, const Array& x, const T beta, * Computes the matrix-matrix product: \f$ C = \alpha AB + \beta C \f$. It will automatically * broadcast the operation if applicable. */ -template -StreamID GEMM(const T alpha, const Array& A, const Array& B, const T beta, const Array& C, +template +StreamID GEMM(const T alpha, const Array& A, const Array& B, const T beta, const Array& C, const StreamID& stream = DEF_CUBLAS_STREAM) { BatchInfo bi = Check::isBroadcastable(A, B, C, "A", "B", "C"); diff --git a/Macros.h b/Macros.h index b0a5c99..ff826e7 100644 --- a/Macros.h +++ b/Macros.h @@ -67,7 +67,7 @@ #define BASIC_LOOP(N) /** - * \def DEVICE_CLASS(name) + * \def DEVICE_COPY(name) * Can be used inside a class declaration (header) which generates boilerplate code to allow this * class to be used on the device. * @@ -76,6 +76,8 @@ * * void allocateDevice(): allocates the memory on the device for this class instance. * + * void freeDevice(): frees the memory on the device for this class instance. + * * CudaTools::StreamID updateHost(const CudaTools::StreamID& stream): updates the host instance * of the class. * @@ -83,7 +85,7 @@ * the device instance of the class. * \param name the name of the class */ -#define DEVICE_CLASS(name) +#define DEVICE_COPY(name) /** * \def CT_ERROR_IF(a, op, b, msg) @@ -175,22 +177,24 @@ return CudaTools::copy(this, that(), sizeof(name)); \ } -#ifdef CUDA +#ifdef CUDACC -#define DEVICE_CLASS(name) \ +#define DEVICE_COPY(name) \ private: \ name* __deviceInstance__ = nullptr; \ \ public: \ - inline name* that() { return __deviceInstance__; } \ + inline name* that() { return __deviceInstance__; }; \ inline void allocateDevice() { __deviceInstance__ = (name*)CudaTools::malloc(sizeof(name)); }; \ + inline void freeDevice() { CudaTools::free(__deviceInstance__); }; \ UPDATE_FUNC(name) #else -#define DEVICE_CLASS(name) \ +#define DEVICE_COPY(name) \ public: \ inline name* that() { return this; }; \ inline void allocateDevice(){}; \ + inline void freeDevice(){}; \ UPDATE_FUNC(name) #endif diff --git a/docs/source/core.rst b/docs/source/core.rst index d06a78d..0ef435b 100644 --- a/docs/source/core.rst +++ b/docs/source/core.rst @@ -53,10 +53,10 @@ Device Helpers .. doxygendefine:: BASIC_LOOP -Device Class +Device Copy ------------ -.. doxygendefine:: DEVICE_CLASS +.. doxygendefine:: DEVICE_COPY Memory Functions diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 6833570..7a264ab 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -61,7 +61,7 @@ being device-compatible. We follow the previous example in a similar fashion. .. code-block:: cpp class intPair { - DEVICE_CLASS(intPair) + DEVICE_COPY(intPair) public: int x, y; @@ -92,11 +92,13 @@ being device-compatible. We follow the previous example in a similar fashion. return 0; } -In this example, we create a class called ``intPair``, which is then made available on the device through -the ``DEVICE_CLASS(name)`` macro. Specifically, that macro introduces a few functions, like -``allocateDevice()``, ``updateDevice()``, ``updateHost()``, and ``that()``. The ``that()`` function -returns a pointer to the copy on the device. As a result, the programmer **must** define a destructor -that frees the pointer using ``CudaTools::free(that)``. For more details, see :ref:`here `. +In this example, we create a class called ``intPair``, and enable device-copying functions through +the ``DEVICE_COPY(name)`` macro. This is not necessary for a class or struct to be available on the device, as we can always pass objects through the kernel function arguments. This is useful to prevent constant copying, and potentially separating class copies between host and device. + +The aforementioned macro introduces a few functions, like +``allocateDevice()``, ``freeDevice()``, ``updateDevice()``, ``updateHost()``, and ``that()``. +The ``that()`` function returns a pointer to the copy on the device. As a result when using this, the programmer +**must** define a destructor that frees the pointer using ``freeDevice()``. For more details, see :ref:`here `. .. warning:: The ``updateDevice()`` and ``updateHost()`` in most cases will need to be explicitly called diff --git a/samples/2_CoreClass/main.cu.cpp b/samples/2_CoreClass/main.cu.cpp index e1b0c10..af8d388 100644 --- a/samples/2_CoreClass/main.cu.cpp +++ b/samples/2_CoreClass/main.cu.cpp @@ -2,7 +2,7 @@ #include class intPair { - DEVICE_CLASS(intPair) + DEVICE_COPY(intPair) public: int x, y; diff --git a/tests.cu.cpp b/tests.cu.cpp index 46a75b4..e1d8974 100644 --- a/tests.cu.cpp +++ b/tests.cu.cpp @@ -88,7 +88,7 @@ CT::Shape makeRandom2DShape() { /////////// class TestClass { - DEVICE_CLASS(TestClass); + DEVICE_COPY(TestClass); public: int x;