#ifndef CUDATOOLS_ARRAY_H #define CUDATOOLS_ARRAY_H #include "Core.h" #include "Macros.h" #include "Types.h" #include #include #include #include #include #include #ifdef CUDATOOLS_USE_EIGEN #include #endif #ifdef CUDATOOLS_USE_PYTHON #include #include namespace py = pybind11; #endif #ifdef DEVICE #define POINTER pDevice #else #define POINTER pHost #endif using namespace CudaTools::Types; namespace CudaTools { #ifdef CUDATOOLS_USE_EIGEN template using EigenMat = Eigen::Matrix; template using EigenMapMat = Eigen::Map>; template using ConstEigenMapMat = Eigen::Map>; template struct EigenAdaptConst_S { typedef EigenMapMat type; }; template struct EigenAdaptConst_S { typedef ConstEigenMapMat type; }; template using EigenAdaptConst = typename EigenAdaptConst_S::type; #endif template class Array; struct Slice { uint32_t first; uint32_t second; HD Slice(const std::initializer_list i) : first(*i.begin()), second(*(i.begin() + 1)) {} }; template class ArrayIterator { private: template friend std::ostream& operator<<(std::ostream& out, const ArrayIterator& it); T* pData; Shape mShape; uint32_t mIndices[CUDATOOLS_ARRAY_MAX_AXES] = {0}; public: HD ArrayIterator(T* p, const Shape& shape) : pData(p), mShape(shape){}; /** * Moves the iterator to the next value. */ HD void next() { bool carry = false; uint32_t offset = 0; for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) { if (mIndices[iAxis] == mShape.dim(iAxis) - 1) { mIndices[iAxis] = 0; offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1); carry = true; } else { pData += mShape.stride(iAxis); mIndices[iAxis] += 1; carry = false; } if (not carry) { pData -= offset; return; } } pData += 1; // "Overflow" occured, so we reached end of array. } /** * Moves the iterator to the previous value. */ HD void prev() { bool carry = false; uint32_t offset = 0; for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) { if (mIndices[iAxis] == 0) { mIndices[iAxis] = mShape.dim(iAxis) - 1; offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1); carry = true; } else { pData -= mShape.stride(iAxis); mIndices[iAxis] += 1; carry = false; } if (not carry) { pData += offset; return; } } pData -= 1; } /** * Moves the iterator a specified value away. * \param amount the amount to advance by */ HD void advance(const int32_t amount) { if (amount < 0) { for (uint32_t i = 0; i < std::abs(amount); ++i) { prev(); } } else { for (uint32_t i = 0; i < std::abs(amount); ++i) { next(); } } } HD void operator++() { next(); }; /**< Prefix increment operator. */ HD void operator--() { prev(); }; /**< Prefix decrement operator. */ /**< Addition operator. */ HD ArrayIterator operator+(const int32_t v) const { ArrayIterator it = *this; it.advance(v); return it; }; /** Subtraction operator.*/ HD ArrayIterator operator-(const int32_t v) const { ArrayIterator it = *this; it.advance(-v); return it; }; HD void operator+=(const int32_t v) { advance(v); }; HD void operator-=(const int32_t v) { advance(-v); }; HD T& operator*() { return *pData; }; /**< Dereference operator. */ HD const T& operator*() const { return *pData; }; /**< Const dereference operator. */ /** * Equals operator. */ HD bool operator==(const ArrayIterator& it) { return pData == it.pData; } /** * Not equals operator. */ HD bool operator!=(const ArrayIterator& it) { return pData != it.pData; } }; template std::ostream& operator<<(std::ostream& out, const ArrayIterator& it) { return out << it.pData; } template class ArrayLoader { private: ArrayIterator mIterator; ArrayIterator mIteratorEnd; public: HD ArrayLoader(const ArrayIterator& it, const ArrayIterator& it_end) : mIterator(it), mIteratorEnd(it_end){}; HD ArrayLoader &operator,(const T value) { CT_ERROR_IF(mIterator, ==, mIteratorEnd, "Cannot assign more values than Array size"); *mIterator = value; ++mIterator; return *this; } }; /** * A container that holds a N-dimensional array, stored column major. To set the * maximum N, there is a compiler macro CUDATOOLS_ARRAY_MAX_DIM whose default value is 4. * It adapts to operations between host and device to ease memory management. */ template class Array { private: template friend std::ostream& operator<<(std::ostream&, const Array&); Shape mShape; T* pHost = nullptr; T* pDevice = nullptr; bool mIsView = false; bool mIsSlice = false; uint32_t mEndOffset = 0; HD void freeArrays() { #ifndef DEVICE if (not mIsView) { if (pDevice != nullptr) CudaTools::free(pDevice); if (pHost != nullptr) delete[] pHost; } #endif }; HD void calcEnd() { uint32_t offset = 0; for (uint32_t i = 0; i < shape().axes(); ++i) { offset += (shape().dim(i) - 1) * shape().stride(i); } mEndOffset = offset + 1; }; public: HD Array() = default; /** * Constructor for an Array that creates an allocates an array with * the specified Shape. Construction in this format is disabled on the device. * \brief Host only * \param shape the shape of the array * \param noDevice whether to initialize the array on the device */ Array(const Shape& shape, const bool noDevice = false) : mShape(shape), mIsView(false) { pHost = new T[shape.items()]; calcEnd(); if (noDevice) return; pDevice = reinterpret_cast(CudaTools::malloc(shape.items() * sizeof(T))); }; /** * Constructor for an Array from an existing (preallocated) pointer. * \param pointer the pointer to use * \param shape the shape of the array * \param noDevice whether to initialize the array on the device */ HD Array(T* const pointer, const Shape& shape, const bool noDevice = false) : mShape(shape), mIsView(true), mIsSlice(false) { POINTER = pointer; calcEnd(); #ifndef DEVICE if (noDevice) return; pDevice = reinterpret_cast(CudaTools::malloc(shape.items() * sizeof(T))); #endif }; /** * Constructor for making a Array view from another Array, * given an offset and shape. * \param arr the original Array * \param shape the shape of the new array * \param offset the index where to start the a view of the array */ HD Array(const Array& arr, const Shape& shape, const uint32_t offset = 0) : mShape(shape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(true), mIsSlice(arr.mIsSlice) { calcEnd(); if (pHost != nullptr) pHost += offset; if (pDevice != nullptr) pDevice += offset; }; /** * The copy-constructor for a Array. If this is not a view, a deep copy * of the data will be performed on both host and device. On the device, it is always * treated like a view. */ HD Array(const Array& arr) : mShape(arr.mShape), mIsView(arr.mIsView), mIsSlice(arr.mIsSlice) { calcEnd(); if (mIsView) { // If the other array was a view (and now this one), just assign. pHost = arr.pHost; pDevice = arr.pDevice; return; } // Otherwise, we assume this is needs to own data. pHost = new T[mShape.items()]; auto arr_it = arr.begin(); for (auto it = begin(); it != end(); ++it) { *it = *arr_it; ++arr_it; } #ifndef DEVICE if (arr.pDevice != nullptr) { pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T)); } #endif }; /** * The move-constructor for a Array. */ HD Array(Array&& arr) : mShape(arr.mShape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(arr.mIsView), mIsSlice(arr.mIsSlice) { calcEnd(); // Make other object empty. arr.pHost = nullptr; arr.pDevice = nullptr; arr.mIsView = true; }; HD ~Array() { freeArrays(); }; /** * The copy-assignment operator for a Array. If this is not a view, * then the currently owned data will be freed, and a deep copy of the data will * be performed on both host and device. On the device, it is always treated like a view. */ HD Array& operator=(const Array& arr) { if (this == &arr) return *this; if (mIsView) { // If this array is a view, we assign data from the right-hand side. auto arr_it = arr.begin(); for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) { *it = *arr_it; ++arr_it; } return *this; } // Otherwise, it is implied to be object reassignment. mShape = arr.mShape; mIsView = arr.mIsView; mIsSlice = arr.mIsSlice; calcEnd(); // Regardless if the right-hand side is a view, we create a new copy. // In case that the right-hand side is a view of this array, we // allocate memory to copy first. Keep in mind that the right-hand side // array will then become undefined. // We can only do this on the host. #ifndef DEVICE T* new_pDevice = nullptr; if (pDevice != nullptr) { new_pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T)); } T* new_pHost = new T[mShape.items()]; memcpy(new_pHost, arr.pHost, mShape.items() * sizeof(T)); freeArrays(); pHost = new_pHost; pDevice = new_pDevice; #else pHost = arr.pHost; pDevice = arr.pDevice; #endif return *this; }; /** * The move-assignment operator for a Array. */ HD Array& operator=(Array&& arr) { if (this == &arr) return *this; if (mIsView) { // If this array is a view, we assign data from the right-hand side. auto arr_it = arr.begin(); for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) { *it = *arr_it; ++arr_it; } return *this; } CT_ERROR(arr.mIsView, "Cannot move-assign view to a non-view (owner). This would lead to undefined " "behavior."); // Otherwise, it is implied to be object reassignment. freeArrays(); mShape = arr.mShape; pHost = arr.pHost; pDevice = arr.pDevice; mIsView = arr.mIsView; mIsSlice = arr.mIsSlice; calcEnd(); // Make other array empty. arr.pHost = nullptr; arr.pDevice = nullptr; arr.mIsView = true; return *this; }; /** * Used for indexing the Array. * \param index index of the first dimension */ HD Array operator[](const uint32_t index) const { CT_ERROR_IF(index, >=, shape().dim(0), "Index exceeds axis size"); return Array(*this, shape().subshape(1), index * shape().stride(0)); }; /** * Used for indexing the Array. * \param indices a list of indices to index the Array */ HD Array operator[](const std::initializer_list indices) const { CT_ERROR_IF(indices.size(), >, shape().axes(), "Number of indices cannot exceed number of axes"); auto it = indices.begin(); uint offset = 0; for (uint32_t i = 0; i < indices.size(); ++i) { uint32_t index = *it; CT_ERROR_IF(index, >=, shape().dim(i), "Index exceeds axis size"); offset += index * shape().stride(i); ++it; } return Array(*this, shape().subshape(indices.size()), offset); }; HD ArrayLoader operator<<(const T value) { auto it = begin(); *it = value; ++it; return ArrayLoader(it, end()); }; HD T operator=(const T& value) { return POINTER[0] = value; }; HD operator T&() { return POINTER[0]; }; HD operator const T&() const { return POINTER[0]; }; /** * Used to create slices of the Array. * \param slices a list of slices to slice the Array */ HD Array slice(const std::initializer_list slices) const { CT_ERROR_IF(slices.size(), >, shape().axes(), "Number of slices cannot exceed number of axes"); uint offset = 0; Shape new_shape = mShape; auto it = slices.begin(); for (uint32_t i = 0; i < slices.size(); ++i) { uint32_t from_index = it->first; uint32_t to_index = it->second; CT_ERROR_IF(from_index, >, to_index, "Slice start cannot be greater than than slice end"); CT_ERROR_IF(from_index, >=, shape().dim(i), "Slice start exceeds axis size"); CT_ERROR_IF(to_index - 1, >=, shape().dim(i), "Slice end exceeds axis size"); offset += from_index * shape().stride(i); new_shape.mAxisDim[i] = to_index - from_index; ++it; } new_shape.mItems = 1; for (uint32_t i = 0; i < shape().axes(); ++i) { new_shape.mItems *= new_shape.dim(i); } Array arr(*this, new_shape, offset); arr.mIsSlice = true; return arr; }; /** * Returns this Array with a different Shape. Its self assigning version is reshape. * If this Array is a slice of another, then it will perform a deep copy, and return * a new non-view array. */ HD Array reshaped(const Shape& new_shape) const { CT_ERROR_IF(shape().items(), !=, new_shape.items(), "New shape cannot have a different number of terms"); CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)") Array arr = view(); arr.mShape = new_shape; return arr; }; HD void reshape(const Shape& new_shape) { CT_ERROR_IF(shape().items(), !=, new_shape.items(), "New shape cannot have a different number of terms"); CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)") mShape = new_shape; }; /** * Gets a view that is has at least two dimensions. Useful for promoting * single vectors to their 2D counterparts. */ HD Array atLeast2D() const { return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view(); }; /** * Reshapes this array, making it at least 2D. Useful for promoting * single vectors to their 2D counterparts. */ HD void asAtLeast2D() { if (shape().axes() == 1) reshape({shape().length(), 1}); }; /** * Returns a view of this Array that has been flattened into one dimension. */ HD Array flattened() const { return reshaped({mShape.mItems}); }; /** * Flattens this Array into one dimension. */ HD void flatten() { reshape({mShape.mItems}); }; #ifdef CUDATOOLS_USE_EIGEN /** * Returns the Eigen::Map of this Array. */ EigenAdaptConst> eigenMap() const { uint32_t total_dim = mShape.mAxes; CT_ERROR(mIsSlice, "Mapping to an Eigen array cannot occur on slices") CT_ERROR_IF(total_dim, !=, 2, "Mapping to an Eigen array can only occur on two-dimensional arrays"); return EigenAdaptConst>((ComplexConversion*)POINTER, mShape.rows(), mShape.cols()); }; #endif /** * Gets the Shape of the Array. */ HD Shape shape() const { return mShape; }; /** * Gets the pointer to this array, depending on host or device. */ HD ComplexConversion* data() const { return (ComplexConversion*)POINTER; }; /** * Returns the device pointer regardless of host or device. */ HD T* dataDevice() const { return pDevice; }; HD bool isView() const { return mIsView; }; /**< Gets whether this Array is a view. */ HD bool isSlice() const { return mIsSlice; }; /**< Gets whether this Array is a slice. */ /** * Gets a view of this Array. */ HD Array view() const { return Array(*this, mShape); } /** * Copies this Array and returns a new Array with the same memory. */ Array copy() const { Array arr(mShape, (pDevice == nullptr)); auto arr_it = arr.begin(); for (auto it = begin(); it != end(); ++it) { *arr_it = *it; ++arr_it; } #ifndef DEVICE if (pDevice != nullptr) { CudaTools::copy(pDevice, arr.dataDevice(), mShape.items() * sizeof(T)).wait(); } #endif return arr; }; /** * Gets the iterator to the beginning of this Array. */ HD ArrayIterator begin() const { return ArrayIterator(POINTER, mShape); }; /** * Gets the iterator to the end of this Array. */ HD ArrayIterator end() const { return ArrayIterator(POINTER + mEndOffset, mShape); }; /** * Sets the values of the entire Array to a constant. This is restricted to numerical types. */ HD void setConstant(const T value) const { static_assert(is_host_num, "Function only available on host-compatible numeric types."); for (auto it = begin(); it != end(); ++it) { *it = value; } }; /** * Sets the Array values with uniform random values in a specified range. This is restricted to * numerical types. * \brief Host only */ void setRandom(const T min, const T max) const { static_assert(is_host_num, "Function only available on host-compatible numeric types."); if constexpr (is_complex) { CT_ERROR_IF(max.real(), <, min.real(), "Upper bound of range cannot be larger than lower bound"); CT_ERROR_IF(max.imag(), <, min.imag(), "Upper bound of range cannot be larger than lower bound"); } else { CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound"); } std::random_device rd; std::mt19937 mt(rd()); if constexpr (is_int) { std::uniform_int_distribution dist(min, max); for (auto it = begin(); it != end(); ++it) { *it = dist(mt); } } else if constexpr (is_float) { std::uniform_real_distribution dist(min, max); for (auto it = begin(); it != end(); ++it) { *it = dist(mt); } } else if constexpr (is_complex) { std::uniform_real_distribution> distr(min.real(), max.real()); std::uniform_real_distribution> disti(min.imag(), max.imag()); for (auto it = begin(); it != end(); ++it) { *it = T(distr(mt), disti(mt)); } } }; /** * Sets the Array values to start from a value and increment by a specified step. This is * restricted to numerical types. */ HD void setRange(T min, const T step = 1) const { static_assert(is_host_num, "Function only available on host-compatible numeric types."); for (auto it = begin(); it != end(); ++it) { *it = min; min += step; } } /** * Sets the Array values to be evenly spaced numbers over a given interval. This is restricted * to floating point types. */ HD void setLinspace(const T min, const T max) const { static_assert(is_float, "Function only available on numeric floating types."); CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound"); T i = 0; T d = max - min; T items = (T)(shape().items() - 1); for (auto it = begin(); it != end(); ++it) { *it = min + d * (i / items); i += 1; } }; /** * Returns array of given shape with constant values. This is restricted to numerical types. * \brief Host only */ static Array constant(const Shape& shape, const T value) { static_assert(is_host_num, "Function only available on host-compatible numeric types."); Array arr(shape); arr.setConstant(value); return arr; }; /** * Returns array of given shape with random values in given interval. This is restricted to * numerical types. * \brief Host only */ static Array random(const Shape& shape, const T min, const T max) { static_assert(is_host_num, "Function only available on host-compatible numeric types."); Array arr(shape); arr.setRandom(min, max); return arr; }; /** * Returns evenly spaced values within a given interval. This is restricted to numerical types. * \brief Host only */ static Array range(const T min, const T max, const T step = 1) { static_assert(is_host_num, "Function only available on host-compatible numeric types."); CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound"); Array arr({(uint32_t)((max - min) / step)}); arr.setRange(min, step); return arr; } /** * Returns evenly spaced values within a given interval. This is restricted to floating point * types. * \brief Host only */ static Array linspace(const T min, const T max, const uint32_t size) { static_assert(is_float, "Function only available on numeric floating types."); Array arr({size}); arr.setLinspace(min, max); return arr; } /** * Transposes the internal data and returns the corresponding new Array. * Its self assigning version is transpose. This is restricted to numerical types. * \brief Host only */ Array transposed() const { static_assert(is_host_num, "Function only available on host-compatible numeric types."); CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays"); Array new_arr({mShape.rows(), mShape.cols()}); new_arr.eigenMap() = this->eigenMap().transpose().eval(); return new_arr; }; /** * Transposes the intenal data. Its self assigning version is transpose. * This is restricted to numerical types. * \brief Host only */ void transpose() { static_assert(is_host_num, "Function only available on host-compatible numeric types."); CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays"); Array new_arr(*this, {mShape.cols(), mShape.rows()}); new_arr.eigenMap() = this->eigenMap().transpose().eval(); mShape = Shape({mShape.cols(), mShape.rows()}); }; void inverse() const { static_assert(is_float, "Function only available on floating numeric types."); CT_ERROR_IF(shape().axes(), !=, 2, "Inverse can only occur on two-dimensional arrays"); CT_ERROR_IF(shape().rows(), !=, shape().cols(), "Inverse can only occur on square matrices"); Array inv(shape()); inv.eigenMap() = this->eigenMap().inverse(); }; /** * Pins the memory (page locks) for faster memory transfer in concurrent * transfers. * \brief Host only */ void pinMemory() const { CudaTools::pin(pHost, mShape.items() * sizeof(T)); }; /** * Updates the host copy by copying the device data back to the host. * \brief Host only */ StreamID updateHost(const StreamID& stream = DEF_MEM_STREAM) const { CT_ERROR(mIsSlice, "Cannot update host copy on a slice"); return CudaTools::copy(pDevice, pHost, mShape.items() * sizeof(T), stream); }; /** * Updates the device copy by copying the host data to the device. * \brief Host only */ StreamID updateDevice(const StreamID& stream = DEF_MEM_STREAM) const { CT_ERROR(mIsSlice, "Cannot update device copy on a slice"); return CudaTools::copy(pHost, pDevice, mShape.items() * sizeof(T), stream); }; #ifdef CUDATOOLS_USE_PYTHON /** * Returns a py::array for making an Array available as a Python numpy array. */ py::array pyArray() const { std::vector dims, strides; for (uint iAxis = 0; iAxis < mShape.axes(); ++iAxis) { dims.push_back(static_cast(mShape.dim(iAxis))); strides.push_back(sizeof(T) * static_cast(mShape.stride(iAxis))); } return py::array_t( py::buffer_info((void*)pHost, sizeof(T), py::format_descriptor::format(), static_cast(mShape.axes()), dims, strides)); }; #endif }; template void printAxis(std::ostream& out, const Array& arr, const uint32_t axis, size_t width) { std::string space = std::string(2 * axis, ' '); if (arr.shape().axes() == 1) { out << "["; for (uint32_t i = 0; i < arr.shape().items(); ++i) { if constexpr (std::is_floating_point::value) { out << std::scientific << std::setprecision(6); } if (width == 0) { out << ((i == 0) ? "" : " "); } else { out << std::setw((i == 0) ? width - 1 : width); } out << static_cast(arr[i]) << ((i == arr.shape().items() - 1) ? "]" : ","); } } else if (arr.shape().axes() == 2) { for (uint32_t i = 0; i < arr.shape().dim(0); ++i) { out << space << ((i == 0) ? "[" : " "); printAxis(out, arr[i], axis + 1, width); out << ((i == arr.shape().dim(0) - 1) ? "]" : ",\n"); } } else { out << space << "[\n"; for (uint32_t i = 0; i < arr.shape().dim(0); ++i) { printAxis(out, arr[i], axis + 1, width); out << ((i == arr.shape().dim(0) - 1) ? "\n" : ",\n\n"); } out << space << "]"; } } template std::ostream& operator<<(std::ostream& out, const Array& arr) { size_t width = 0; if constexpr (is_int) { T max_val = 0; bool negative = false; for (auto it = arr.begin(); it != arr.end(); ++it) { T val = *it; if constexpr (not std::is_unsigned::value) { if (*it < 0) { negative = true; val *= -1; } } max_val = (val > max_val) ? val : max_val; } width = std::to_string(max_val).size() + 1; width += (negative) ? 1 : 0; } else if constexpr (is_float) { T max_val = 0; bool negative = false; for (auto it = arr.begin(); it != arr.end(); ++it) { if (*it < 0) negative = true; int exp = 0; frexp(*it, &exp); max_val = (exp > max_val) ? exp : max_val; } width = std::to_string(max_val).size() + 5; width += (negative) ? 1 : 0; } printAxis(out, arr, 0, (arr.shape().axes() == 1) ? 0 : width); return out; } }; // namespace CudaTools #endif // ARRAY_H