You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							845 lines
						
					
					
						
							28 KiB
						
					
					
				
			
		
		
	
	
							845 lines
						
					
					
						
							28 KiB
						
					
					
				| #ifndef CUDATOOLS_ARRAY_H
 | |
| #define CUDATOOLS_ARRAY_H
 | |
| 
 | |
| #include "Core.h"
 | |
| #include "Macros.h"
 | |
| #include "Types.h"
 | |
| 
 | |
| #include <cmath>
 | |
| #include <complex>
 | |
| #include <cstdlib>
 | |
| #include <iomanip>
 | |
| #include <random>
 | |
| #include <type_traits>
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_EIGEN
 | |
| #include <Eigen/Dense>
 | |
| #endif
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_PYTHON
 | |
| #include <pybind11/numpy.h>
 | |
| #include <pybind11/pybind11.h>
 | |
| namespace py = pybind11;
 | |
| #endif
 | |
| 
 | |
| #ifdef DEVICE
 | |
| #define POINTER pDevice
 | |
| #else
 | |
| #define POINTER pHost
 | |
| #endif
 | |
| 
 | |
| using namespace CudaTools::Types;
 | |
| 
 | |
| namespace CudaTools {
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_EIGEN
 | |
| template <typename T>
 | |
| using EigenMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
 | |
| template <typename T> using EigenMapMat = Eigen::Map<EigenMat<T>>;
 | |
| template <typename T> using ConstEigenMapMat = Eigen::Map<const EigenMat<T>>;
 | |
| 
 | |
| template <typename T> struct EigenAdaptConst_S { typedef EigenMapMat<T> type; };
 | |
| template <typename T> struct EigenAdaptConst_S<const T> { typedef ConstEigenMapMat<T> type; };
 | |
| template <typename T> using EigenAdaptConst = typename EigenAdaptConst_S<T>::type;
 | |
| #endif
 | |
| 
 | |
| template <typename T> class Array;
 | |
| 
 | |
| struct Slice {
 | |
|     uint32_t first;
 | |
|     uint32_t second;
 | |
|     HD Slice(const std::initializer_list<uint32_t> i)
 | |
|         : first(*i.begin()), second(*(i.begin() + 1)) {}
 | |
| };
 | |
| 
 | |
| template <typename T> class ArrayIterator {
 | |
|   private:
 | |
|     template <typename U>
 | |
|     friend std::ostream& operator<<(std::ostream& out, const ArrayIterator<U>& it);
 | |
|     T* pData;
 | |
|     Shape mShape;
 | |
|     uint32_t mIndices[CUDATOOLS_ARRAY_MAX_AXES] = {0};
 | |
| 
 | |
|   public:
 | |
|     HD ArrayIterator(T* p, const Shape& shape) : pData(p), mShape(shape){};
 | |
| 
 | |
|     /**
 | |
|      * Moves the iterator to the next value.
 | |
|      */
 | |
|     HD void next() {
 | |
|         bool carry = false;
 | |
|         uint32_t offset = 0;
 | |
|         for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
 | |
|             if (mIndices[iAxis] == mShape.dim(iAxis) - 1) {
 | |
|                 mIndices[iAxis] = 0;
 | |
|                 offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
 | |
|                 carry = true;
 | |
|             } else {
 | |
|                 pData += mShape.stride(iAxis);
 | |
|                 mIndices[iAxis] += 1;
 | |
|                 carry = false;
 | |
|             }
 | |
| 
 | |
|             if (not carry) {
 | |
|                 pData -= offset;
 | |
|                 return;
 | |
|             }
 | |
|         }
 | |
|         pData += 1; // "Overflow" occured, so we reached end of array.
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Moves the iterator to the previous value.
 | |
|      */
 | |
|     HD void prev() {
 | |
|         bool carry = false;
 | |
|         uint32_t offset = 0;
 | |
|         for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
 | |
|             if (mIndices[iAxis] == 0) {
 | |
|                 mIndices[iAxis] = mShape.dim(iAxis) - 1;
 | |
|                 offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
 | |
|                 carry = true;
 | |
|             } else {
 | |
|                 pData -= mShape.stride(iAxis);
 | |
|                 mIndices[iAxis] += 1;
 | |
|                 carry = false;
 | |
|             }
 | |
|             if (not carry) {
 | |
|                 pData += offset;
 | |
|                 return;
 | |
|             }
 | |
|         }
 | |
|         pData -= 1;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Moves the iterator a specified value away.
 | |
|      * \param amount the amount to advance by
 | |
|      */
 | |
|     HD void advance(const int32_t amount) {
 | |
|         if (amount < 0) {
 | |
|             for (uint32_t i = 0; i < std::abs(amount); ++i) {
 | |
|                 prev();
 | |
|             }
 | |
|         } else {
 | |
|             for (uint32_t i = 0; i < std::abs(amount); ++i) {
 | |
|                 next();
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     HD void operator++() { next(); }; /**< Prefix increment operator. */
 | |
|     HD void operator--() { prev(); }; /**< Prefix decrement operator. */
 | |
| 
 | |
|     /**< Addition operator. */
 | |
|     HD ArrayIterator<T> operator+(const int32_t v) const {
 | |
|         ArrayIterator<T> it = *this;
 | |
|         it.advance(v);
 | |
|         return it;
 | |
|     };
 | |
| 
 | |
|     /** Subtraction operator.*/
 | |
|     HD ArrayIterator<T> operator-(const int32_t v) const {
 | |
|         ArrayIterator<T> it = *this;
 | |
|         it.advance(-v);
 | |
|         return it;
 | |
|     };
 | |
|     HD void operator+=(const int32_t v) { advance(v); };
 | |
|     HD void operator-=(const int32_t v) { advance(-v); };
 | |
| 
 | |
|     HD T& operator*() { return *pData; };             /**< Dereference operator. */
 | |
|     HD const T& operator*() const { return *pData; }; /**< Const dereference operator. */
 | |
| 
 | |
|     /**
 | |
|      * Equals operator.
 | |
|      */
 | |
|     HD bool operator==(const ArrayIterator<T>& it) { return pData == it.pData; }
 | |
| 
 | |
|     /**
 | |
|      * Not equals operator.
 | |
|      */
 | |
|     HD bool operator!=(const ArrayIterator<T>& it) { return pData != it.pData; }
 | |
| };
 | |
| 
 | |
| template <typename T> std::ostream& operator<<(std::ostream& out, const ArrayIterator<T>& it) {
 | |
|     return out << it.pData;
 | |
| }
 | |
| 
 | |
| template <typename T> class ArrayLoader {
 | |
|   private:
 | |
|     ArrayIterator<T> mIterator;
 | |
|     ArrayIterator<T> mIteratorEnd;
 | |
| 
 | |
|   public:
 | |
|     HD ArrayLoader(const ArrayIterator<T>& it, const ArrayIterator<T>& it_end)
 | |
|         : mIterator(it), mIteratorEnd(it_end){};
 | |
|     HD ArrayLoader &operator,(const T value) {
 | |
|         CT_ERROR_IF(mIterator, ==, mIteratorEnd, "Cannot assign more values than Array size");
 | |
|         *mIterator = value;
 | |
|         ++mIterator;
 | |
|         return *this;
 | |
|     }
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * A container that holds a N-dimensional array, stored column major. To set the
 | |
|  * maximum N, there is a compiler macro CUDATOOLS_ARRAY_MAX_DIM whose default value is 4.
 | |
|  * It adapts to operations between host and device to ease memory management.
 | |
|  */
 | |
| template <typename T> class Array {
 | |
|   private:
 | |
|     template <typename U> friend std::ostream& operator<<(std::ostream&, const Array<U>&);
 | |
| 
 | |
|     Shape mShape;
 | |
|     T* pHost = nullptr;
 | |
|     T* pDevice = nullptr;
 | |
| 
 | |
|     bool mIsView = false;
 | |
|     bool mIsSlice = false;
 | |
| 
 | |
|     uint32_t mEndOffset = 0;
 | |
| 
 | |
|     HD void freeArrays() {
 | |
| #ifndef DEVICE
 | |
|         if (not mIsView) {
 | |
|             if (pDevice != nullptr) CudaTools::free(pDevice);
 | |
|             if (pHost != nullptr) delete[] pHost;
 | |
|         }
 | |
| #endif
 | |
|     };
 | |
| 
 | |
|     HD void calcEnd() {
 | |
|         uint32_t offset = 0;
 | |
|         for (uint32_t i = 0; i < shape().axes(); ++i) {
 | |
|             offset += (shape().dim(i) - 1) * shape().stride(i);
 | |
|         }
 | |
|         mEndOffset = offset + 1;
 | |
|     };
 | |
| 
 | |
|   public:
 | |
|     HD Array() = default;
 | |
| 
 | |
|     /**
 | |
|      * Constructor for an Array that creates an allocates an array with
 | |
|      * the specified Shape. Construction in this format is disabled on the device.
 | |
|      * \brief Host only
 | |
|      * \param shape the shape of the array
 | |
|      * \param noDevice whether to initialize the array on the device
 | |
|      */
 | |
|     Array(const Shape& shape, const bool noDevice = false) : mShape(shape), mIsView(false) {
 | |
|         pHost = new T[shape.items()];
 | |
|         calcEnd();
 | |
|         if (noDevice) return;
 | |
|         pDevice = reinterpret_cast<T*>(CudaTools::malloc(shape.items() * sizeof(T)));
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Constructor for an Array from an existing (preallocated) pointer.
 | |
|      * \param pointer the pointer to use
 | |
|      * \param shape the shape of the array
 | |
|      * \param noDevice whether to initialize the array on the device
 | |
|      */
 | |
|     HD Array(T* const pointer, const Shape& shape, const bool noDevice = false)
 | |
|         : mShape(shape), mIsView(true), mIsSlice(false) {
 | |
|         POINTER = pointer;
 | |
|         calcEnd();
 | |
| #ifndef DEVICE
 | |
|         if (noDevice) return;
 | |
|         pDevice = reinterpret_cast<T*>(CudaTools::malloc(shape.items() * sizeof(T)));
 | |
| #endif
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Constructor for making a Array view from another Array,
 | |
|      * given an offset and shape.
 | |
|      * \param arr the original Array
 | |
|      * \param shape the shape of the new array
 | |
|      * \param offset the index where to start the a view of the array
 | |
|      */
 | |
|     HD Array(const Array& arr, const Shape& shape, const uint32_t offset = 0)
 | |
|         : mShape(shape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(true),
 | |
|           mIsSlice(arr.mIsSlice) {
 | |
|         calcEnd();
 | |
|         if (pHost != nullptr) pHost += offset;
 | |
|         if (pDevice != nullptr) pDevice += offset;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * The copy-constructor for a Array. If this is not a view, a deep copy
 | |
|      * of the data will be performed on both host and device. On the device, it is always
 | |
|      * treated like a view.
 | |
|      */
 | |
|     HD Array(const Array& arr) : mShape(arr.mShape), mIsView(arr.mIsView), mIsSlice(arr.mIsSlice) {
 | |
|         calcEnd();
 | |
|         if (mIsView) { // If the other array was a view (and now this one), just assign.
 | |
|             pHost = arr.pHost;
 | |
|             pDevice = arr.pDevice;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         // Otherwise, we assume this is needs to own data.
 | |
|         pHost = new T[mShape.items()];
 | |
|         auto arr_it = arr.begin();
 | |
|         for (auto it = begin(); it != end(); ++it) {
 | |
|             *it = *arr_it;
 | |
|             ++arr_it;
 | |
|         }
 | |
| 
 | |
| #ifndef DEVICE
 | |
|         if (arr.pDevice != nullptr) {
 | |
|             pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
 | |
|         }
 | |
| #endif
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      *  The move-constructor for a Array.
 | |
|      */
 | |
|     HD Array(Array&& arr)
 | |
|         : mShape(arr.mShape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(arr.mIsView),
 | |
|           mIsSlice(arr.mIsSlice) {
 | |
|         calcEnd();
 | |
|         // Make other object empty.
 | |
|         arr.pHost = nullptr;
 | |
|         arr.pDevice = nullptr;
 | |
|         arr.mIsView = true;
 | |
|     };
 | |
| 
 | |
|     HD ~Array() { freeArrays(); };
 | |
| 
 | |
|     /**
 | |
|      * The copy-assignment operator for a Array. If this is not a view,
 | |
|      * then the currently owned data will be freed, and a deep copy of the data will
 | |
|      * be performed on both host and device. On the device, it is always treated like a view.
 | |
|      */
 | |
|     HD Array& operator=(const Array& arr) {
 | |
|         if (this == &arr) return *this;
 | |
| 
 | |
|         if (mIsView) { // If this array is a view, we assign data from the right-hand side.
 | |
|             auto arr_it = arr.begin();
 | |
|             for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
 | |
|                 *it = *arr_it;
 | |
|                 ++arr_it;
 | |
|             }
 | |
|             return *this;
 | |
|         }
 | |
| 
 | |
|         // Otherwise, it is implied to be object reassignment.
 | |
|         mShape = arr.mShape;
 | |
|         mIsView = arr.mIsView;
 | |
|         mIsSlice = arr.mIsSlice;
 | |
|         calcEnd();
 | |
| 
 | |
|         // Regardless if the right-hand side is a view, we create a new copy.
 | |
|         // In case that the right-hand side is a view of this array, we
 | |
|         // allocate memory to copy first. Keep in mind that the right-hand side
 | |
|         // array will then become undefined.
 | |
| 
 | |
|         // We can only do this on the host.
 | |
| #ifndef DEVICE
 | |
|         T* new_pDevice = nullptr;
 | |
|         if (pDevice != nullptr) {
 | |
|             new_pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
 | |
|         }
 | |
| 
 | |
|         T* new_pHost = new T[mShape.items()];
 | |
|         memcpy(new_pHost, arr.pHost, mShape.items() * sizeof(T));
 | |
| 
 | |
|         freeArrays();
 | |
|         pHost = new_pHost;
 | |
|         pDevice = new_pDevice;
 | |
| #else
 | |
|         pHost = arr.pHost;
 | |
|         pDevice = arr.pDevice;
 | |
| #endif
 | |
|         return *this;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * The move-assignment operator for a Array.
 | |
|      */
 | |
|     HD Array& operator=(Array&& arr) {
 | |
|         if (this == &arr) return *this;
 | |
| 
 | |
|         if (mIsView) { // If this array is a view, we assign data from the right-hand side.
 | |
|             auto arr_it = arr.begin();
 | |
|             for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
 | |
|                 *it = *arr_it;
 | |
|                 ++arr_it;
 | |
|             }
 | |
|             return *this;
 | |
|         }
 | |
| 
 | |
|         CT_ERROR(arr.mIsView,
 | |
|                  "Cannot move-assign view to a non-view (owner). This would lead to undefined "
 | |
|                  "behavior.");
 | |
| 
 | |
|         // Otherwise, it is implied to be object reassignment.
 | |
|         freeArrays();
 | |
|         mShape = arr.mShape;
 | |
|         pHost = arr.pHost;
 | |
|         pDevice = arr.pDevice;
 | |
|         mIsView = arr.mIsView;
 | |
|         mIsSlice = arr.mIsSlice;
 | |
|         calcEnd();
 | |
| 
 | |
|         // Make other array empty.
 | |
|         arr.pHost = nullptr;
 | |
|         arr.pDevice = nullptr;
 | |
|         arr.mIsView = true;
 | |
|         return *this;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Used for indexing the Array.
 | |
|      * \param index index of the first dimension
 | |
|      */
 | |
|     HD Array operator[](const uint32_t index) const {
 | |
|         CT_ERROR_IF(index, >=, shape().dim(0), "Index exceeds axis size");
 | |
|         return Array(*this, shape().subshape(1), index * shape().stride(0));
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Used for indexing the Array.
 | |
|      * \param indices a list of indices to index the Array
 | |
|      */
 | |
|     HD Array operator[](const std::initializer_list<uint32_t> indices) const {
 | |
|         CT_ERROR_IF(indices.size(), >, shape().axes(),
 | |
|                     "Number of indices cannot exceed number of axes");
 | |
|         auto it = indices.begin();
 | |
|         uint offset = 0;
 | |
|         for (uint32_t i = 0; i < indices.size(); ++i) {
 | |
|             uint32_t index = *it;
 | |
|             CT_ERROR_IF(index, >=, shape().dim(i), "Index exceeds axis size");
 | |
|             offset += index * shape().stride(i);
 | |
|             ++it;
 | |
|         }
 | |
|         return Array(*this, shape().subshape(indices.size()), offset);
 | |
|     };
 | |
| 
 | |
|     HD ArrayLoader<T> operator<<(const T value) {
 | |
|         auto it = begin();
 | |
|         *it = value;
 | |
|         ++it;
 | |
|         return ArrayLoader<T>(it, end());
 | |
|     };
 | |
| 
 | |
|     HD T operator=(const T& value) { return POINTER[0] = value; };
 | |
|     HD operator T&() { return POINTER[0]; };
 | |
|     HD operator const T&() const { return POINTER[0]; };
 | |
| 
 | |
|     /**
 | |
|      * Used to create slices of the Array.
 | |
|      * \param slices a list of slices to slice the Array
 | |
|      */
 | |
|     HD Array slice(const std::initializer_list<Slice> slices) const {
 | |
|         CT_ERROR_IF(slices.size(), >, shape().axes(),
 | |
|                     "Number of slices cannot exceed number of axes");
 | |
| 
 | |
|         uint offset = 0;
 | |
|         Shape new_shape = mShape;
 | |
|         auto it = slices.begin();
 | |
|         for (uint32_t i = 0; i < slices.size(); ++i) {
 | |
|             uint32_t from_index = it->first;
 | |
|             uint32_t to_index = it->second;
 | |
|             CT_ERROR_IF(from_index, >, to_index,
 | |
|                         "Slice start cannot be greater than than slice end");
 | |
|             CT_ERROR_IF(from_index, >=, shape().dim(i), "Slice start exceeds axis size");
 | |
|             CT_ERROR_IF(to_index - 1, >=, shape().dim(i), "Slice end exceeds axis size");
 | |
| 
 | |
|             offset += from_index * shape().stride(i);
 | |
|             new_shape.mAxisDim[i] = to_index - from_index;
 | |
|             ++it;
 | |
|         }
 | |
|         new_shape.mItems = 1;
 | |
|         for (uint32_t i = 0; i < shape().axes(); ++i) {
 | |
|             new_shape.mItems *= new_shape.dim(i);
 | |
|         }
 | |
| 
 | |
|         Array<T> arr(*this, new_shape, offset);
 | |
|         arr.mIsSlice = true;
 | |
|         return arr;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Returns this Array with a different Shape. Its self assigning version is reshape.
 | |
|      * If this Array is a slice of another, then it will perform a deep copy, and return
 | |
|      * a new non-view array.
 | |
|      */
 | |
|     HD Array reshaped(const Shape& new_shape) const {
 | |
|         CT_ERROR_IF(shape().items(), !=, new_shape.items(),
 | |
|                     "New shape cannot have a different number of terms");
 | |
|         CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
 | |
|         Array<T> arr = view();
 | |
|         arr.mShape = new_shape;
 | |
|         return arr;
 | |
|     };
 | |
| 
 | |
|     HD void reshape(const Shape& new_shape) {
 | |
|         CT_ERROR_IF(shape().items(), !=, new_shape.items(),
 | |
|                     "New shape cannot have a different number of terms");
 | |
|         CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
 | |
|         mShape = new_shape;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Gets a view that is has at least two dimensions. Useful for promoting
 | |
|      * single vectors to their 2D counterparts.
 | |
|      */
 | |
|     HD Array atLeast2D() const {
 | |
|         return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Reshapes this array, making it at least 2D. Useful for promoting
 | |
|      * single vectors to their 2D counterparts.
 | |
|      */
 | |
|     HD void asAtLeast2D() {
 | |
|         if (shape().axes() == 1) reshape({shape().length(), 1});
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Returns a view of this Array that has been flattened into one dimension.
 | |
|      */
 | |
|     HD Array flattened() const { return reshaped({mShape.mItems}); };
 | |
| 
 | |
|     /**
 | |
|      * Flattens this Array into one dimension.
 | |
|      */
 | |
|     HD void flatten() { reshape({mShape.mItems}); };
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_EIGEN
 | |
|     /**
 | |
|      * Returns the Eigen::Map of this Array.
 | |
|      */
 | |
|     EigenAdaptConst<ComplexConversion<T>> eigenMap() const {
 | |
|         uint32_t total_dim = mShape.mAxes;
 | |
|         CT_ERROR(mIsSlice, "Mapping to an Eigen array cannot occur on slices")
 | |
|         CT_ERROR_IF(total_dim, !=, 2,
 | |
|                     "Mapping to an Eigen array can only occur on two-dimensional arrays");
 | |
|         return EigenAdaptConst<ComplexConversion<T>>((ComplexConversion<T>*)POINTER, mShape.rows(),
 | |
|                                                      mShape.cols());
 | |
|     };
 | |
| #endif
 | |
| 
 | |
|     /**
 | |
|      * Gets the Shape of the Array.
 | |
|      */
 | |
|     HD Shape shape() const { return mShape; };
 | |
| 
 | |
|     /**
 | |
|      * Gets the pointer to this array, depending on host or device.
 | |
|      */
 | |
|     HD ComplexConversion<T>* data() const { return (ComplexConversion<T>*)POINTER; };
 | |
| 
 | |
|     /**
 | |
|      * Returns the device pointer regardless of host or device.
 | |
|      */
 | |
|     HD T* dataDevice() const { return pDevice; };
 | |
| 
 | |
|     HD bool isView() const { return mIsView; };   /**< Gets whether this Array is a view. */
 | |
|     HD bool isSlice() const { return mIsSlice; }; /**< Gets whether this Array is a slice. */
 | |
| 
 | |
|     /**
 | |
|      * Gets a view of this Array.
 | |
|      */
 | |
|     HD Array view() const { return Array(*this, mShape); }
 | |
| 
 | |
|     /**
 | |
|      * Copies this Array and returns a new Array with the same memory.
 | |
|      */
 | |
|     Array copy() const {
 | |
|         Array<T> arr(mShape, (pDevice == nullptr));
 | |
| 
 | |
|         auto arr_it = arr.begin();
 | |
|         for (auto it = begin(); it != end(); ++it) {
 | |
|             *arr_it = *it;
 | |
|             ++arr_it;
 | |
|         }
 | |
| #ifndef DEVICE
 | |
|         if (pDevice != nullptr) {
 | |
|             CudaTools::copy(pDevice, arr.dataDevice(), mShape.items() * sizeof(T)).wait();
 | |
|         }
 | |
| #endif
 | |
|         return arr;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Gets the iterator to the beginning of this Array.
 | |
|      */
 | |
|     HD ArrayIterator<T> begin() const { return ArrayIterator<T>(POINTER, mShape); };
 | |
| 
 | |
|     /**
 | |
|      * Gets the iterator to the end of this Array.
 | |
|      */
 | |
|     HD ArrayIterator<T> end() const { return ArrayIterator<T>(POINTER + mEndOffset, mShape); };
 | |
| 
 | |
|     /**
 | |
|      * Sets the values of the entire Array to a constant. This is restricted to numerical types.
 | |
|      */
 | |
|     HD void setConstant(const T value) const {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         for (auto it = begin(); it != end(); ++it) {
 | |
|             *it = value;
 | |
|         }
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Sets the Array values with uniform random values in a specified range. This is restricted to
 | |
|      * numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     void setRandom(const T min, const T max) const {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         if constexpr (is_complex<T>) {
 | |
|             CT_ERROR_IF(max.real(), <, min.real(),
 | |
|                         "Upper bound of range cannot be larger than lower bound");
 | |
|             CT_ERROR_IF(max.imag(), <, min.imag(),
 | |
|                         "Upper bound of range cannot be larger than lower bound");
 | |
|         } else {
 | |
|             CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
 | |
|         }
 | |
|         std::random_device rd;
 | |
|         std::mt19937 mt(rd());
 | |
|         if constexpr (is_int<T>) {
 | |
|             std::uniform_int_distribution<T> dist(min, max);
 | |
|             for (auto it = begin(); it != end(); ++it) {
 | |
|                 *it = dist(mt);
 | |
|             }
 | |
|         } else if constexpr (is_float<T>) {
 | |
|             std::uniform_real_distribution<T> dist(min, max);
 | |
|             for (auto it = begin(); it != end(); ++it) {
 | |
|                 *it = dist(mt);
 | |
|             }
 | |
|         } else if constexpr (is_complex<T>) {
 | |
|             std::uniform_real_distribution<ComplexUnderlying<T>> distr(min.real(), max.real());
 | |
|             std::uniform_real_distribution<ComplexUnderlying<T>> disti(min.imag(), max.imag());
 | |
|             for (auto it = begin(); it != end(); ++it) {
 | |
|                 *it = T(distr(mt), disti(mt));
 | |
|             }
 | |
|         }
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Sets the Array values to start from a value and increment by a specified step. This is
 | |
|      * restricted to numerical types.
 | |
|      */
 | |
|     HD void setRange(T min, const T step = 1) const {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         for (auto it = begin(); it != end(); ++it) {
 | |
|             *it = min;
 | |
|             min += step;
 | |
|         }
 | |
|     }
 | |
|     /**
 | |
|      * Sets the Array values to be evenly spaced numbers over a given interval. This is restricted
 | |
|      * to floating point types.
 | |
|      */
 | |
|     HD void setLinspace(const T min, const T max) const {
 | |
|         static_assert(is_float<T>, "Function only available on numeric floating types.");
 | |
|         CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
 | |
|         T i = 0;
 | |
|         T d = max - min;
 | |
|         T items = (T)(shape().items() - 1);
 | |
|         for (auto it = begin(); it != end(); ++it) {
 | |
|             *it = min + d * (i / items);
 | |
|             i += 1;
 | |
|         }
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Returns array of given shape with constant values. This is restricted to numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     static Array constant(const Shape& shape, const T value) {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         Array<T> arr(shape);
 | |
|         arr.setConstant(value);
 | |
|         return arr;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Returns array of given shape with random values in given interval. This is restricted to
 | |
|      * numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     static Array random(const Shape& shape, const T min, const T max) {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         Array<T> arr(shape);
 | |
|         arr.setRandom(min, max);
 | |
|         return arr;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Returns evenly spaced values within a given interval. This is restricted to numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     static Array range(const T min, const T max, const T step = 1) {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
 | |
|         Array<T> arr({(uint32_t)((max - min) / step)});
 | |
|         arr.setRange(min, step);
 | |
|         return arr;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns evenly spaced values within a given interval. This is restricted to floating point
 | |
|      * types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     static Array linspace(const T min, const T max, const uint32_t size) {
 | |
|         static_assert(is_float<T>, "Function only available on numeric floating types.");
 | |
|         Array<T> arr({size});
 | |
|         arr.setLinspace(min, max);
 | |
|         return arr;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Transposes the internal data and returns the corresponding new Array.
 | |
|      * Its self assigning version is transpose. This is restricted to numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_EIGEN
 | |
|     Array transposed() const {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
 | |
|         Array<T> new_arr({mShape.rows(), mShape.cols()});
 | |
|         new_arr.eigenMap() = this->eigenMap().transpose().eval();
 | |
|         return new_arr;
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Transposes the intenal data. Its self assigning version is transpose.
 | |
|      * This is restricted to numerical types.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     void transpose() {
 | |
|         static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
 | |
|         CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
 | |
|         Array<T> new_arr(*this, {mShape.cols(), mShape.rows()});
 | |
|         new_arr.eigenMap() = this->eigenMap().transpose().eval();
 | |
|         mShape = Shape({mShape.cols(), mShape.rows()});
 | |
|     };
 | |
| 
 | |
|     void inverse() const {
 | |
|         static_assert(is_float<T>, "Function only available on floating numeric types.");
 | |
|         CT_ERROR_IF(shape().axes(), !=, 2, "Inverse can only occur on two-dimensional arrays");
 | |
|         CT_ERROR_IF(shape().rows(), !=, shape().cols(),
 | |
|                     "Inverse can only occur on square matrices");
 | |
|         Array<T> inv(shape());
 | |
|         inv.eigenMap() = this->eigenMap().inverse();
 | |
|     };
 | |
| #endif
 | |
| 
 | |
|     /**
 | |
|      * Pins the memory (page locks) for faster memory transfer in concurrent
 | |
|      * transfers.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     void pinMemory() const { CudaTools::pin(pHost, mShape.items() * sizeof(T)); };
 | |
| 
 | |
|     /**
 | |
|      * Updates the host copy by copying the device data back to the host.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     StreamID updateHost(const StreamID& stream = DEF_MEM_STREAM) const {
 | |
|         CT_ERROR(mIsSlice, "Cannot update host copy on a slice");
 | |
|         return CudaTools::copy(pDevice, pHost, mShape.items() * sizeof(T), stream);
 | |
|     };
 | |
| 
 | |
|     /**
 | |
|      * Updates the device copy by copying the host data to the device.
 | |
|      * \brief Host only
 | |
|      */
 | |
|     StreamID updateDevice(const StreamID& stream = DEF_MEM_STREAM) const {
 | |
|         CT_ERROR(mIsSlice, "Cannot update device copy on a slice");
 | |
|         return CudaTools::copy(pHost, pDevice, mShape.items() * sizeof(T), stream);
 | |
|     };
 | |
| 
 | |
| #ifdef CUDATOOLS_USE_PYTHON
 | |
|     /**
 | |
|      * Returns a py::array for making an Array available as a Python numpy array.
 | |
|      */
 | |
|     py::array pyArray() const {
 | |
|         std::vector<py::ssize_t> dims, strides;
 | |
|         for (uint iAxis = 0; iAxis < mShape.axes(); ++iAxis) {
 | |
|             dims.push_back(static_cast<py::ssize_t>(mShape.dim(iAxis)));
 | |
|             strides.push_back(sizeof(T) * static_cast<py::ssize_t>(mShape.stride(iAxis)));
 | |
|         }
 | |
|         return py::array_t<T, py::array::f_style>(
 | |
|             py::buffer_info((void*)pHost, sizeof(T), py::format_descriptor<T>::format(),
 | |
|                             static_cast<py::ssize_t>(mShape.axes()), dims, strides));
 | |
|     };
 | |
| #endif
 | |
| };
 | |
| 
 | |
| template <typename T>
 | |
| void printAxis(std::ostream& out, const Array<T>& arr, const uint32_t axis, size_t width) {
 | |
|     std::string space = std::string(2 * axis, ' ');
 | |
|     if (arr.shape().axes() == 1) {
 | |
|         out << "[";
 | |
|         for (uint32_t i = 0; i < arr.shape().items(); ++i) {
 | |
|             if constexpr (std::is_floating_point<T>::value) {
 | |
|                 out << std::scientific << std::setprecision(6);
 | |
|             }
 | |
|             if (width == 0) {
 | |
|                 out << ((i == 0) ? "" : " ");
 | |
|             } else {
 | |
|                 out << std::setw((i == 0) ? width - 1 : width);
 | |
|             }
 | |
|             out << static_cast<T>(arr[i]) << ((i == arr.shape().items() - 1) ? "]" : ",");
 | |
|         }
 | |
|     } else if (arr.shape().axes() == 2) {
 | |
|         for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
 | |
|             out << space << ((i == 0) ? "[" : " ");
 | |
|             printAxis(out, arr[i], axis + 1, width);
 | |
|             out << ((i == arr.shape().dim(0) - 1) ? "]" : ",\n");
 | |
|         }
 | |
|     } else {
 | |
|         out << space << "[\n";
 | |
|         for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
 | |
|             printAxis(out, arr[i], axis + 1, width);
 | |
|             out << ((i == arr.shape().dim(0) - 1) ? "\n" : ",\n\n");
 | |
|         }
 | |
|         out << space << "]";
 | |
|     }
 | |
| }
 | |
| 
 | |
| template <typename T> std::ostream& operator<<(std::ostream& out, const Array<T>& arr) {
 | |
|     size_t width = 0;
 | |
|     if constexpr (is_int<T>) {
 | |
|         T max_val = 0;
 | |
|         bool negative = false;
 | |
|         for (auto it = arr.begin(); it != arr.end(); ++it) {
 | |
|             T val = *it;
 | |
|             if constexpr (not std::is_unsigned<T>::value) {
 | |
|                 if (*it < 0) {
 | |
|                     negative = true;
 | |
|                     val *= -1;
 | |
|                 }
 | |
|             }
 | |
|             max_val = (val > max_val) ? val : max_val;
 | |
|         }
 | |
|         width = std::to_string(max_val).size() + 1;
 | |
|         width += (negative) ? 1 : 0;
 | |
|     } else if constexpr (is_float<T>) {
 | |
|         T max_val = 0;
 | |
|         bool negative = false;
 | |
|         for (auto it = arr.begin(); it != arr.end(); ++it) {
 | |
|             if (*it < 0) negative = true;
 | |
|             int exp = 0;
 | |
|             frexp(*it, &exp);
 | |
|             max_val = (exp > max_val) ? exp : max_val;
 | |
|         }
 | |
|         width = std::to_string(max_val).size() + 5;
 | |
|         width += (negative) ? 1 : 0;
 | |
|     }
 | |
| 
 | |
|     printAxis<T>(out, arr, 0, (arr.shape().axes() == 1) ? 0 : width);
 | |
|     return out;
 | |
| }
 | |
| 
 | |
| }; // namespace CudaTools
 | |
| 
 | |
| #endif // ARRAY_H
 | |
| 
 |