Corrected examples for Array

main
Kenneth Jao 2 years ago
parent 39ad7c0955
commit 2b6753cc7c
  1. 26
      Array.h
  2. 61
      docs/source/usage.rst
  3. 45
      samples/3_ArrayKernel/main.cu.cpp
  4. 25
      samples/4_ArrayFunctions/Makefile
  5. 17
      samples/4_ArrayFunctions/main.cu.cpp

@ -450,10 +450,7 @@ template <typename T> class Array {
HD Array reshaped(const Shape& new_shape) const {
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms");
if (mIsSlice) {
Array<T> arr = this->copy();
return arr.reshaped(new_shape);
}
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
Array<T> arr = view();
arr.mShape = new_shape;
return arr;
@ -462,7 +459,7 @@ template <typename T> class Array {
HD void reshape(const Shape& new_shape) {
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms");
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)")
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
mShape = new_shape;
};
@ -471,13 +468,26 @@ template <typename T> class Array {
* single vectors to their 2D counterparts.
*/
HD Array atLeast2D() const {
return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view();
return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
};
/**
* Flattens the Array into one dimension.
* Reshapes this array, making it at least 2D. Useful for promoting
* single vectors to their 2D counterparts.
*/
HD void asAtLeast2D() {
if (shape().axes() == 1) reshape({shape().length(), 1});
};
/**
* Returns a view of this Array that has been flattened into one dimension.
*/
HD Array flattened() const { return reshaped({mShape.mItems}); };
/**
* Flattens this Array into one dimension.
*/
HD Array flatten() const { return reshape({mShape.mItems}); };
HD void flatten() { reshape({mShape.mItems}); };
/**
* Returns the Eigen::Map of this Array.

@ -136,38 +136,53 @@ We can demonstrate a few here.
.. code-block:: cpp
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
BASIC_LOOP(arr.shape().items()) {
arr[iThread] *= 2;
}
DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
CudaTools::Array<int> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
CudaTools::Array<double> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
int main() {
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
arrRange.updateDevice();
arrConst.updateDevice();
arrLinspace.updateDevice();
arrComma.updateDevice().wait();
std::cout << "Before Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
// Call the kernel multiple times asynchronously. Note: since they share same
// stream, they are not run in parallel, just queued on the device.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
// NOTE: Notice that a view is passed into the kernel, not the Array itself.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
arrRange.updateHost();
arrConst.updateHost();
arrLinspace.updateHost();
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
std::cout << "After Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
return 0;
}
In this example, we show a few ways to initialize an ``Array`` through some static functions.
It is templated, so it can (theoretically) support any type. Additionally, you can initialize an
empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). For more details,
empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). Many of these
array functions and initializers have view-returning and self-assigning versions. For instance,
``.flattened()`` returns a flattened view of an Array, and does not modify the original. For more details,
see :ref:`here <CudaTools::Array<T>>`.
We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically
@ -175,28 +190,32 @@ on the kernel given the number of threads. It is intended to be used only for "e
situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will
mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism.
.. warning::
Notice that a view must be passed to the kernel, and not the original object. This
The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and
a few other functions.
.. code-block:: cpp
int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
std::cout << "Modified: \n"
<< arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1;
}

@ -1,34 +1,45 @@
#define CUDATOOLS_IMPLEMENTATION
#include <Core.h>
#include <Array.h>
#include <Core.h>
DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
CudaTools::Array<int> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
BASIC_LOOP(arr.shape().items()) {
arr[iThread] *= 2;
}
DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
CudaTools::Array<double> flat = arr.flattened();
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
}
int main() {
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
arrRange.updateDevice();
arrConst.updateDevice();
arrLinspace.updateDevice();
arrComma.updateDevice().wait();
std::cout << "Before Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
// Call the kernel multiple times asynchronously. Note: since they share same
// stream, they are not run in parallel, just queued on the device.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
// NOTE: Notice that a view is passed into the kernel, not the Array itself.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
arrRange.updateHost();
arrConst.updateHost();
arrLinspace.updateHost();
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
std::cout << "After Kernel:\n";
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
return 0;
}

@ -14,30 +14,7 @@ SRC_DIR = .
BUILD_DIR = build
# Should not need to modify below.
int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1;
}
std::cout << "Modified New Array:\n" << newArr << "\n";
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
return 0;
}
CPU_BUILD_DIR = $(BUILD_DIR)/cpu
GPU_BUILD_DIR = $(BUILD_DIR)/gpu

@ -1,24 +1,25 @@
#define CUDATOOLS_IMPLEMENTATION
#include <Core.h>
#include <Array.h>
#include <Core.h>
int main() {
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[0][0][0] = 1; // Axis by axis indexing.
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
std::cout << arr << "\n";
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
std::cout << "Before Copy:\n" << arrSlice << "\n";
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
std::cout << "After Copy:\n" << arrSlice << "\n";
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
std::cout << "Modified: \n"
<< arr << "\n"; // The original array is modified, since a slice does not copy.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
*it = 1;
}
@ -26,5 +27,3 @@ int main() {
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
return 0;
}

Loading…
Cancel
Save