From 2b6753cc7c42657468ac138a237aeabdda35557e Mon Sep 17 00:00:00 2001 From: Kenneth Jao Date: Fri, 21 Apr 2023 15:39:14 -0500 Subject: [PATCH] Corrected examples for Array --- Array.h | 26 ++++++++---- docs/source/usage.rst | 61 ++++++++++++++++++---------- samples/3_ArrayKernel/main.cu.cpp | 45 ++++++++++++-------- samples/4_ArrayFunctions/Makefile | 25 +----------- samples/4_ArrayFunctions/main.cu.cpp | 17 ++++---- 5 files changed, 95 insertions(+), 79 deletions(-) diff --git a/Array.h b/Array.h index 69b9a06..4efe07e 100644 --- a/Array.h +++ b/Array.h @@ -450,10 +450,7 @@ template class Array { HD Array reshaped(const Shape& new_shape) const { CT_ERROR_IF(shape().items(), !=, new_shape.items(), "New shape cannot have a different number of terms"); - if (mIsSlice) { - Array arr = this->copy(); - return arr.reshaped(new_shape); - } + CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)") Array arr = view(); arr.mShape = new_shape; return arr; @@ -462,7 +459,7 @@ template class Array { HD void reshape(const Shape& new_shape) { CT_ERROR_IF(shape().items(), !=, new_shape.items(), "New shape cannot have a different number of terms"); - CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)") + CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)") mShape = new_shape; }; @@ -471,13 +468,26 @@ template class Array { * single vectors to their 2D counterparts. */ HD Array atLeast2D() const { - return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view(); + return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view(); }; /** - * Flattens the Array into one dimension. + * Reshapes this array, making it at least 2D. Useful for promoting + * single vectors to their 2D counterparts. + */ + HD void asAtLeast2D() { + if (shape().axes() == 1) reshape({shape().length(), 1}); + }; + + /** + * Returns a view of this Array that has been flattened into one dimension. + */ + HD Array flattened() const { return reshaped({mShape.mItems}); }; + + /** + * Flattens this Array into one dimension. */ - HD Array flatten() const { return reshape({mShape.mItems}); }; + HD void flatten() { reshape({mShape.mItems}); }; /** * Returns the Eigen::Map of this Array. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 13cb6cc..009bba5 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -136,38 +136,53 @@ We can demonstrate a few here. .. code-block:: cpp - DEFINE_KERNEL(times2, const CudaTools::Array& arr) { - BASIC_LOOP(arr.shape().items()) { - arr[iThread] *= 2; - } + DEFINE_KERNEL(times2, const CudaTools::Array arr) { + CudaTools::Array flat = arr.flattened(); + BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; } + } + + DEFINE_KERNEL(times2double, const CudaTools::Array arr) { + CudaTools::Array flat = arr.flattened(); + BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; } } int main() { CudaTools::Array arrRange = CudaTools::Array::range(0, 10); - CudaTools::Array arrConst = CudaTools::Array::constant(1); - CudaTools::Array arrLinspace = CudaTools::Array::linspace(0, 5, 10); + CudaTools::Array arrConst = CudaTools::Array::constant({10}, 1); + CudaTools::Array arrLinspace = CudaTools::Array::linspace(0, 5, 10); CudaTools::Array arrComma({2, 2}); // 2x2 array. - arrComma << 1, 2, 3, 4; // Comma initializer if needed. - std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; + arrComma << 1, 2, 3, 4; // Comma initializer if needed. + + arrRange.updateDevice(); + arrConst.updateDevice(); + arrLinspace.updateDevice(); + arrComma.updateDevice().wait(); + + std::cout << "Before Kernel:\n"; + std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n"; // Call the kernel multiple times asynchronously. Note: since they share same // stream, they are not run in parallel, just queued on the device. - KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange); - KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange); - KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait(); - KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait(); + // NOTE: Notice that a view is passed into the kernel, not the Array itself. + KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view()); + KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view()); + KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view()); + KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait(); arrRange.updateHost(); arrConst.updateHost(); arrLinspace.updateHost(); - arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream. + arrComma.updateHost().wait(); // Same stream, so you should wait for the last call. - std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; + std::cout << "After Kernel:\n"; + std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n"; return 0; } In this example, we show a few ways to initialize an ``Array`` through some static functions. It is templated, so it can (theoretically) support any type. Additionally, you can initialize an -empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). For more details, +empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). Many of these +array functions and initializers have view-returning and self-assigning versions. For instance, +``.flattened()`` returns a flattened view of an Array, and does not modify the original. For more details, see :ref:`here >`. We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically @@ -175,28 +190,32 @@ on the kernel given the number of threads. It is intended to be used only for "e situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism. +.. warning:: + Notice that a view must be passed to the kernel, and not the original object. This + The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and a few other functions. .. code-block:: cpp int main() { - CudaTools::Array arr = CudaTools::Array::constant(0); + CudaTools::Array arr = CudaTools::Array::constant({100}, 0); arr.reshape({4, 5, 5}); // Creates a three dimensional array. - arr[0][0][0] = 1; // Axis by axis indexing. + arr[0][0][0] = 1; // Axis by axis indexing. arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing. std::cout << arr << "\n"; - CudaTools::Array arrRange = CudaTools::Array::range(18); - auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center. + CudaTools::Array arrRange = CudaTools::Array::range(0, 18); + auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center. std::cout << "Before Copy:\n" << arrSlice << "\n"; arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!) std::cout << "After Copy:\n" << arrSlice << "\n"; - std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy. + std::cout << "Modified: \n" + << arr << "\n"; // The original array is modified, since a slice does not copy. - CudaTools::Array newArr = arr.copy(); // Copies the original Array. + CudaTools::Array newArr = arr.copy(); // Copies the original Array. for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array. *it = 1; } diff --git a/samples/3_ArrayKernel/main.cu.cpp b/samples/3_ArrayKernel/main.cu.cpp index adc4ed0..05f5a3c 100644 --- a/samples/3_ArrayKernel/main.cu.cpp +++ b/samples/3_ArrayKernel/main.cu.cpp @@ -1,34 +1,45 @@ #define CUDATOOLS_IMPLEMENTATION -#include #include +#include + +DEFINE_KERNEL(times2, const CudaTools::Array arr) { + CudaTools::Array flat = arr.flattened(); + BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; } +} -DEFINE_KERNEL(times2, const CudaTools::Array& arr) { - BASIC_LOOP(arr.shape().items()) { - arr[iThread] *= 2; - } +DEFINE_KERNEL(times2double, const CudaTools::Array arr) { + CudaTools::Array flat = arr.flattened(); + BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; } } int main() { CudaTools::Array arrRange = CudaTools::Array::range(0, 10); - CudaTools::Array arrConst = CudaTools::Array::constant(1); - CudaTools::Array arrLinspace = CudaTools::Array::linspace(0, 5, 10); + CudaTools::Array arrConst = CudaTools::Array::constant({10}, 1); + CudaTools::Array arrLinspace = CudaTools::Array::linspace(0, 5, 10); CudaTools::Array arrComma({2, 2}); // 2x2 array. - arrComma << 1, 2, 3, 4; // Comma initializer if needed. - std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; + arrComma << 1, 2, 3, 4; // Comma initializer if needed. + + arrRange.updateDevice(); + arrConst.updateDevice(); + arrLinspace.updateDevice(); + arrComma.updateDevice().wait(); + + std::cout << "Before Kernel:\n"; + std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n"; // Call the kernel multiple times asynchronously. Note: since they share same // stream, they are not run in parallel, just queued on the device. - KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange); - KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange); - KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait(); - KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait(); + // NOTE: Notice that a view is passed into the kernel, not the Array itself. + KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view()); + KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view()); + KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view()); + KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait(); arrRange.updateHost(); arrConst.updateHost(); arrLinspace.updateHost(); - arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream. + arrComma.updateHost().wait(); // Same stream, so you should wait for the last call. - std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; + std::cout << "After Kernel:\n"; + std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n"; return 0; } - - diff --git a/samples/4_ArrayFunctions/Makefile b/samples/4_ArrayFunctions/Makefile index d0486ce..501a3ac 100644 --- a/samples/4_ArrayFunctions/Makefile +++ b/samples/4_ArrayFunctions/Makefile @@ -14,30 +14,7 @@ SRC_DIR = . BUILD_DIR = build # Should not need to modify below. -int main() { - CudaTools::Array arr = CudaTools::Array::constant(0); - arr.reshape({4, 5, 5}); // Creates a three dimensional array. - - arr[0][0][0] = 1; // Axis by axis indexing. - arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing. - std::cout << arr << "\n"; - - CudaTools::Array arrRange = CudaTools::Array::range(18); - auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center. - std::cout << "Before Copy:\n" << arrSlice << "\n"; - arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!) - std::cout << "After Copy:\n" << arrSlice << "\n"; - - std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy. - - CudaTools::Array newArr = arr.copy(); // Copies the original Array. - for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array. - *it = 1; - } - std::cout << "Modified New Array:\n" << newArr << "\n"; - std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy. - return 0; -} + CPU_BUILD_DIR = $(BUILD_DIR)/cpu GPU_BUILD_DIR = $(BUILD_DIR)/gpu diff --git a/samples/4_ArrayFunctions/main.cu.cpp b/samples/4_ArrayFunctions/main.cu.cpp index 3979f3c..806329c 100644 --- a/samples/4_ArrayFunctions/main.cu.cpp +++ b/samples/4_ArrayFunctions/main.cu.cpp @@ -1,24 +1,25 @@ #define CUDATOOLS_IMPLEMENTATION -#include #include +#include int main() { - CudaTools::Array arr = CudaTools::Array::constant(0); + CudaTools::Array arr = CudaTools::Array::constant({100}, 0); arr.reshape({4, 5, 5}); // Creates a three dimensional array. - arr[0][0][0] = 1; // Axis by axis indexing. + arr[0][0][0] = 1; // Axis by axis indexing. arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing. std::cout << arr << "\n"; - CudaTools::Array arrRange = CudaTools::Array::range(18); - auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center. + CudaTools::Array arrRange = CudaTools::Array::range(0, 18); + auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center. std::cout << "Before Copy:\n" << arrSlice << "\n"; arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!) std::cout << "After Copy:\n" << arrSlice << "\n"; - std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy. + std::cout << "Modified: \n" + << arr << "\n"; // The original array is modified, since a slice does not copy. - CudaTools::Array newArr = arr.copy(); // Copies the original Array. + CudaTools::Array newArr = arr.copy(); // Copies the original Array. for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array. *it = 1; } @@ -26,5 +27,3 @@ int main() { std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy. return 0; } - -