Corrected examples for Array

main
Kenneth Jao 2 years ago
parent 39ad7c0955
commit 2b6753cc7c
  1. 26
      Array.h
  2. 61
      docs/source/usage.rst
  3. 45
      samples/3_ArrayKernel/main.cu.cpp
  4. 25
      samples/4_ArrayFunctions/Makefile
  5. 17
      samples/4_ArrayFunctions/main.cu.cpp

@ -450,10 +450,7 @@ template <typename T> class Array {
HD Array reshaped(const Shape& new_shape) const { HD Array reshaped(const Shape& new_shape) const {
CT_ERROR_IF(shape().items(), !=, new_shape.items(), CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms"); "New shape cannot have a different number of terms");
if (mIsSlice) { CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
Array<T> arr = this->copy();
return arr.reshaped(new_shape);
}
Array<T> arr = view(); Array<T> arr = view();
arr.mShape = new_shape; arr.mShape = new_shape;
return arr; return arr;
@ -462,7 +459,7 @@ template <typename T> class Array {
HD void reshape(const Shape& new_shape) { HD void reshape(const Shape& new_shape) {
CT_ERROR_IF(shape().items(), !=, new_shape.items(), CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms"); "New shape cannot have a different number of terms");
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)") CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
mShape = new_shape; mShape = new_shape;
}; };
@ -471,13 +468,26 @@ template <typename T> class Array {
* single vectors to their 2D counterparts. * single vectors to their 2D counterparts.
*/ */
HD Array atLeast2D() const { HD Array atLeast2D() const {
return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view(); return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
}; };
/** /**
* Flattens the Array into one dimension. * Reshapes this array, making it at least 2D. Useful for promoting
* single vectors to their 2D counterparts.
*/
HD void asAtLeast2D() {
if (shape().axes() == 1) reshape({shape().length(), 1});
};
/**
* Returns a view of this Array that has been flattened into one dimension.
*/
HD Array flattened() const { return reshaped({mShape.mItems}); };
/**
* Flattens this Array into one dimension.
*/ */
HD Array flatten() const { return reshape({mShape.mItems}); }; HD void flatten() { reshape({mShape.mItems}); };
/** /**
* Returns the Eigen::Map of this Array. * Returns the Eigen::Map of this Array.

@ -136,38 +136,53 @@ We can demonstrate a few here.
.. code-block:: cpp .. code-block:: cpp
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) { DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
BASIC_LOOP(arr.shape().items()) { CudaTools::Array<int> flat = arr.flattened();
arr[iThread] *= 2; BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
} }
DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
CudaTools::Array<double> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
} }
int main() { int main() {
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10); CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1); CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10); CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array. CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
arrComma << 1, 2, 3, 4; // Comma initializer if needed. arrComma << 1, 2, 3, 4; // Comma initializer if needed.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
arrRange.updateDevice();
arrConst.updateDevice();
arrLinspace.updateDevice();
arrComma.updateDevice().wait();
std::cout << "Before Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
// Call the kernel multiple times asynchronously. Note: since they share same // Call the kernel multiple times asynchronously. Note: since they share same
// stream, they are not run in parallel, just queued on the device. // stream, they are not run in parallel, just queued on the device.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange); // NOTE: Notice that a view is passed into the kernel, not the Array itself.
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange); KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait(); KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait(); KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
arrRange.updateHost(); arrRange.updateHost();
arrConst.updateHost(); arrConst.updateHost();
arrLinspace.updateHost(); arrLinspace.updateHost();
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream. arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; std::cout << "After Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
return 0; return 0;
} }
In this example, we show a few ways to initialize an ``Array`` through some static functions. In this example, we show a few ways to initialize an ``Array`` through some static functions.
It is templated, so it can (theoretically) support any type. Additionally, you can initialize an It is templated, so it can (theoretically) support any type. Additionally, you can initialize an
empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). For more details, empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). Many of these
array functions and initializers have view-returning and self-assigning versions. For instance,
``.flattened()`` returns a flattened view of an Array, and does not modify the original. For more details,
see :ref:`here <CudaTools::Array<T>>`. see :ref:`here <CudaTools::Array<T>>`.
We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically
@ -175,28 +190,32 @@ on the kernel given the number of threads. It is intended to be used only for "e
situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will
mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism. mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism.
.. warning::
Notice that a view must be passed to the kernel, and not the original object. This
The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and
a few other functions. a few other functions.
.. code-block:: cpp .. code-block:: cpp
int main() { int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0); CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array. arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing. arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing. arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n"; std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18); CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center. auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n"; std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!) arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n"; std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy. std::cout << "Modified: \n"
<< arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array. CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array. for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1; *it = 1;
} }

@ -1,34 +1,45 @@
#define CUDATOOLS_IMPLEMENTATION #define CUDATOOLS_IMPLEMENTATION
#include <Core.h>
#include <Array.h> #include <Array.h>
#include <Core.h>
DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
CudaTools::Array<int> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) { DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
BASIC_LOOP(arr.shape().items()) { CudaTools::Array<double> flat = arr.flattened();
arr[iThread] *= 2; BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
} }
int main() { int main() {
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10); CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1); CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10); CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array. CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
arrComma << 1, 2, 3, 4; // Comma initializer if needed. arrComma << 1, 2, 3, 4; // Comma initializer if needed.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
arrRange.updateDevice();
arrConst.updateDevice();
arrLinspace.updateDevice();
arrComma.updateDevice().wait();
std::cout << "Before Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
// Call the kernel multiple times asynchronously. Note: since they share same // Call the kernel multiple times asynchronously. Note: since they share same
// stream, they are not run in parallel, just queued on the device. // stream, they are not run in parallel, just queued on the device.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange); // NOTE: Notice that a view is passed into the kernel, not the Array itself.
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange); KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait(); KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait(); KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
arrRange.updateHost(); arrRange.updateHost();
arrConst.updateHost(); arrConst.updateHost();
arrLinspace.updateHost(); arrLinspace.updateHost();
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream. arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n"; std::cout << "After Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
return 0; return 0;
} }

@ -14,30 +14,7 @@ SRC_DIR = .
BUILD_DIR = build BUILD_DIR = build
# Should not need to modify below. # Should not need to modify below.
int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1;
}
std::cout << "Modified New Array:\n" << newArr << "\n";
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
return 0;
}
CPU_BUILD_DIR = $(BUILD_DIR)/cpu CPU_BUILD_DIR = $(BUILD_DIR)/cpu
GPU_BUILD_DIR = $(BUILD_DIR)/gpu GPU_BUILD_DIR = $(BUILD_DIR)/gpu

@ -1,24 +1,25 @@
#define CUDATOOLS_IMPLEMENTATION #define CUDATOOLS_IMPLEMENTATION
#include <Core.h>
#include <Array.h> #include <Array.h>
#include <Core.h>
int main() { int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0); CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array. arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing. arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing. arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n"; std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18); CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center. auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n"; std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!) arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n"; std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy. std::cout << "Modified: \n"
<< arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array. CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array. for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1; *it = 1;
} }
@ -26,5 +27,3 @@ int main() {
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy. std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
return 0; return 0;
} }

Loading…
Cancel
Save