A library and framework for developing CPU-CUDA compatible applications under one unified code.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

845 lines
28 KiB

#ifndef CUDATOOLS_ARRAY_H
#define CUDATOOLS_ARRAY_H
#include "Core.h"
#include "Macros.h"
#include "Types.h"
#include <cmath>
#include <complex>
#include <cstdlib>
#include <iomanip>
#include <random>
#include <type_traits>
#ifdef CUDATOOLS_USE_EIGEN
#include <Eigen/Dense>
#endif
#ifdef CUDATOOLS_USE_PYTHON
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
namespace py = pybind11;
#endif
#ifdef DEVICE
#define POINTER pDevice
#else
#define POINTER pHost
#endif
using namespace CudaTools::Types;
namespace CudaTools {
#ifdef CUDATOOLS_USE_EIGEN
template <typename T>
using EigenMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
template <typename T> using EigenMapMat = Eigen::Map<EigenMat<T>>;
template <typename T> using ConstEigenMapMat = Eigen::Map<const EigenMat<T>>;
template <typename T> struct EigenAdaptConst_S { typedef EigenMapMat<T> type; };
template <typename T> struct EigenAdaptConst_S<const T> { typedef ConstEigenMapMat<T> type; };
template <typename T> using EigenAdaptConst = typename EigenAdaptConst_S<T>::type;
#endif
template <typename T> class Array;
struct Slice {
uint32_t first;
uint32_t second;
HD Slice(const std::initializer_list<uint32_t> i)
: first(*i.begin()), second(*(i.begin() + 1)) {}
};
template <typename T> class ArrayIterator {
private:
template <typename U>
friend std::ostream& operator<<(std::ostream& out, const ArrayIterator<U>& it);
T* pData;
Shape mShape;
uint32_t mIndices[CUDATOOLS_ARRAY_MAX_AXES] = {0};
public:
HD ArrayIterator(T* p, const Shape& shape) : pData(p), mShape(shape){};
/**
* Moves the iterator to the next value.
*/
HD void next() {
bool carry = false;
uint32_t offset = 0;
for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
if (mIndices[iAxis] == mShape.dim(iAxis) - 1) {
mIndices[iAxis] = 0;
offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
carry = true;
} else {
pData += mShape.stride(iAxis);
mIndices[iAxis] += 1;
carry = false;
}
if (not carry) {
pData -= offset;
return;
}
}
pData += 1; // "Overflow" occured, so we reached end of array.
}
/**
* Moves the iterator to the previous value.
*/
HD void prev() {
bool carry = false;
uint32_t offset = 0;
for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
if (mIndices[iAxis] == 0) {
mIndices[iAxis] = mShape.dim(iAxis) - 1;
offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
carry = true;
} else {
pData -= mShape.stride(iAxis);
mIndices[iAxis] += 1;
carry = false;
}
if (not carry) {
pData += offset;
return;
}
}
pData -= 1;
}
/**
* Moves the iterator a specified value away.
* \param amount the amount to advance by
*/
HD void advance(const int32_t amount) {
if (amount < 0) {
for (uint32_t i = 0; i < std::abs(amount); ++i) {
prev();
}
} else {
for (uint32_t i = 0; i < std::abs(amount); ++i) {
next();
}
}
}
HD void operator++() { next(); }; /**< Prefix increment operator. */
HD void operator--() { prev(); }; /**< Prefix decrement operator. */
/**< Addition operator. */
HD ArrayIterator<T> operator+(const int32_t v) const {
ArrayIterator<T> it = *this;
it.advance(v);
return it;
};
/** Subtraction operator.*/
HD ArrayIterator<T> operator-(const int32_t v) const {
ArrayIterator<T> it = *this;
it.advance(-v);
return it;
};
HD void operator+=(const int32_t v) { advance(v); };
HD void operator-=(const int32_t v) { advance(-v); };
HD T& operator*() { return *pData; }; /**< Dereference operator. */
HD const T& operator*() const { return *pData; }; /**< Const dereference operator. */
/**
* Equals operator.
*/
HD bool operator==(const ArrayIterator<T>& it) { return pData == it.pData; }
/**
* Not equals operator.
*/
HD bool operator!=(const ArrayIterator<T>& it) { return pData != it.pData; }
};
template <typename T> std::ostream& operator<<(std::ostream& out, const ArrayIterator<T>& it) {
return out << it.pData;
}
template <typename T> class ArrayLoader {
private:
ArrayIterator<T> mIterator;
ArrayIterator<T> mIteratorEnd;
public:
HD ArrayLoader(const ArrayIterator<T>& it, const ArrayIterator<T>& it_end)
: mIterator(it), mIteratorEnd(it_end){};
HD ArrayLoader &operator,(const T value) {
CT_ERROR_IF(mIterator, ==, mIteratorEnd, "Cannot assign more values than Array size");
*mIterator = value;
++mIterator;
return *this;
}
};
/**
* A container that holds a N-dimensional array, stored column major. To set the
* maximum N, there is a compiler macro CUDATOOLS_ARRAY_MAX_DIM whose default value is 4.
* It adapts to operations between host and device to ease memory management.
*/
template <typename T> class Array {
private:
template <typename U> friend std::ostream& operator<<(std::ostream&, const Array<U>&);
Shape mShape;
T* pHost = nullptr;
T* pDevice = nullptr;
bool mIsView = false;
bool mIsSlice = false;
uint32_t mEndOffset = 0;
HD void freeArrays() {
#ifndef DEVICE
if (not mIsView) {
if (pDevice != nullptr) CudaTools::free(pDevice);
if (pHost != nullptr) delete[] pHost;
}
#endif
};
HD void calcEnd() {
uint32_t offset = 0;
for (uint32_t i = 0; i < shape().axes(); ++i) {
offset += (shape().dim(i) - 1) * shape().stride(i);
}
mEndOffset = offset + 1;
};
public:
HD Array() = default;
/**
* Constructor for an Array that creates an allocates an array with
* the specified Shape. Construction in this format is disabled on the device.
* \brief Host only
* \param shape the shape of the array
* \param noDevice whether to initialize the array on the device
*/
Array(const Shape& shape, const bool noDevice = false) : mShape(shape), mIsView(false) {
pHost = new T[shape.items()];
calcEnd();
if (noDevice) return;
pDevice = reinterpret_cast<T*>(CudaTools::malloc(shape.items() * sizeof(T)));
};
/**
* Constructor for an Array from an existing (preallocated) pointer.
* \param pointer the pointer to use
* \param shape the shape of the array
* \param noDevice whether to initialize the array on the device
*/
HD Array(T* const pointer, const Shape& shape, const bool noDevice = false)
: mShape(shape), mIsView(true), mIsSlice(false) {
POINTER = pointer;
calcEnd();
#ifndef DEVICE
if (noDevice) return;
pDevice = reinterpret_cast<T*>(CudaTools::malloc(shape.items() * sizeof(T)));
#endif
};
/**
* Constructor for making a Array view from another Array,
* given an offset and shape.
* \param arr the original Array
* \param shape the shape of the new array
* \param offset the index where to start the a view of the array
*/
HD Array(const Array& arr, const Shape& shape, const uint32_t offset = 0)
: mShape(shape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(true),
mIsSlice(arr.mIsSlice) {
calcEnd();
if (pHost != nullptr) pHost += offset;
if (pDevice != nullptr) pDevice += offset;
};
/**
* The copy-constructor for a Array. If this is not a view, a deep copy
* of the data will be performed on both host and device. On the device, it is always
* treated like a view.
*/
HD Array(const Array& arr) : mShape(arr.mShape), mIsView(arr.mIsView), mIsSlice(arr.mIsSlice) {
calcEnd();
if (mIsView) { // If the other array was a view (and now this one), just assign.
pHost = arr.pHost;
pDevice = arr.pDevice;
return;
}
// Otherwise, we assume this is needs to own data.
pHost = new T[mShape.items()];
auto arr_it = arr.begin();
for (auto it = begin(); it != end(); ++it) {
*it = *arr_it;
++arr_it;
}
#ifndef DEVICE
if (arr.pDevice != nullptr) {
pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
}
#endif
};
/**
* The move-constructor for a Array.
*/
HD Array(Array&& arr)
: mShape(arr.mShape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(arr.mIsView),
mIsSlice(arr.mIsSlice) {
calcEnd();
// Make other object empty.
arr.pHost = nullptr;
arr.pDevice = nullptr;
arr.mIsView = true;
};
HD ~Array() { freeArrays(); };
/**
* The copy-assignment operator for a Array. If this is not a view,
* then the currently owned data will be freed, and a deep copy of the data will
* be performed on both host and device. On the device, it is always treated like a view.
*/
HD Array& operator=(const Array& arr) {
if (this == &arr) return *this;
if (mIsView) { // If this array is a view, we assign data from the right-hand side.
auto arr_it = arr.begin();
for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
*it = *arr_it;
++arr_it;
}
return *this;
}
// Otherwise, it is implied to be object reassignment.
mShape = arr.mShape;
mIsView = arr.mIsView;
mIsSlice = arr.mIsSlice;
calcEnd();
// Regardless if the right-hand side is a view, we create a new copy.
// In case that the right-hand side is a view of this array, we
// allocate memory to copy first. Keep in mind that the right-hand side
// array will then become undefined.
// We can only do this on the host.
#ifndef DEVICE
T* new_pDevice = nullptr;
if (pDevice != nullptr) {
new_pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
}
T* new_pHost = new T[mShape.items()];
memcpy(new_pHost, arr.pHost, mShape.items() * sizeof(T));
freeArrays();
pHost = new_pHost;
pDevice = new_pDevice;
#else
pHost = arr.pHost;
pDevice = arr.pDevice;
#endif
return *this;
};
/**
* The move-assignment operator for a Array.
*/
HD Array& operator=(Array&& arr) {
if (this == &arr) return *this;
if (mIsView) { // If this array is a view, we assign data from the right-hand side.
auto arr_it = arr.begin();
for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
*it = *arr_it;
++arr_it;
}
return *this;
}
CT_ERROR(arr.mIsView,
"Cannot move-assign view to a non-view (owner). This would lead to undefined "
"behavior.");
// Otherwise, it is implied to be object reassignment.
freeArrays();
mShape = arr.mShape;
pHost = arr.pHost;
pDevice = arr.pDevice;
mIsView = arr.mIsView;
mIsSlice = arr.mIsSlice;
calcEnd();
// Make other array empty.
arr.pHost = nullptr;
arr.pDevice = nullptr;
arr.mIsView = true;
return *this;
};
/**
* Used for indexing the Array.
* \param index index of the first dimension
*/
HD Array operator[](const uint32_t index) const {
CT_ERROR_IF(index, >=, shape().dim(0), "Index exceeds axis size");
return Array(*this, shape().subshape(1), index * shape().stride(0));
};
/**
* Used for indexing the Array.
* \param indices a list of indices to index the Array
*/
HD Array operator[](const std::initializer_list<uint32_t> indices) const {
CT_ERROR_IF(indices.size(), >, shape().axes(),
"Number of indices cannot exceed number of axes");
auto it = indices.begin();
uint offset = 0;
for (uint32_t i = 0; i < indices.size(); ++i) {
uint32_t index = *it;
CT_ERROR_IF(index, >=, shape().dim(i), "Index exceeds axis size");
offset += index * shape().stride(i);
++it;
}
return Array(*this, shape().subshape(indices.size()), offset);
};
HD ArrayLoader<T> operator<<(const T value) {
auto it = begin();
*it = value;
++it;
return ArrayLoader<T>(it, end());
};
HD T operator=(const T& value) { return POINTER[0] = value; };
HD operator T&() { return POINTER[0]; };
HD operator const T&() const { return POINTER[0]; };
/**
* Used to create slices of the Array.
* \param slices a list of slices to slice the Array
*/
HD Array slice(const std::initializer_list<Slice> slices) const {
CT_ERROR_IF(slices.size(), >, shape().axes(),
"Number of slices cannot exceed number of axes");
uint offset = 0;
Shape new_shape = mShape;
auto it = slices.begin();
for (uint32_t i = 0; i < slices.size(); ++i) {
uint32_t from_index = it->first;
uint32_t to_index = it->second;
CT_ERROR_IF(from_index, >, to_index,
"Slice start cannot be greater than than slice end");
CT_ERROR_IF(from_index, >=, shape().dim(i), "Slice start exceeds axis size");
CT_ERROR_IF(to_index - 1, >=, shape().dim(i), "Slice end exceeds axis size");
offset += from_index * shape().stride(i);
new_shape.mAxisDim[i] = to_index - from_index;
++it;
}
new_shape.mItems = 1;
for (uint32_t i = 0; i < shape().axes(); ++i) {
new_shape.mItems *= new_shape.dim(i);
}
Array<T> arr(*this, new_shape, offset);
arr.mIsSlice = true;
return arr;
};
/**
* Returns this Array with a different Shape. Its self assigning version is reshape.
* If this Array is a slice of another, then it will perform a deep copy, and return
* a new non-view array.
*/
HD Array reshaped(const Shape& new_shape) const {
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms");
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
Array<T> arr = view();
arr.mShape = new_shape;
return arr;
};
HD void reshape(const Shape& new_shape) {
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
"New shape cannot have a different number of terms");
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
mShape = new_shape;
};
/**
* Gets a view that is has at least two dimensions. Useful for promoting
* single vectors to their 2D counterparts.
*/
HD Array atLeast2D() const {
return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
};
/**
* Reshapes this array, making it at least 2D. Useful for promoting
* single vectors to their 2D counterparts.
*/
HD void asAtLeast2D() {
if (shape().axes() == 1) reshape({shape().length(), 1});
};
/**
* Returns a view of this Array that has been flattened into one dimension.
*/
HD Array flattened() const { return reshaped({mShape.mItems}); };
/**
* Flattens this Array into one dimension.
*/
HD void flatten() { reshape({mShape.mItems}); };
#ifdef CUDATOOLS_USE_EIGEN
/**
* Returns the Eigen::Map of this Array.
*/
EigenAdaptConst<ComplexConversion<T>> eigenMap() const {
uint32_t total_dim = mShape.mAxes;
CT_ERROR(mIsSlice, "Mapping to an Eigen array cannot occur on slices")
CT_ERROR_IF(total_dim, !=, 2,
"Mapping to an Eigen array can only occur on two-dimensional arrays");
return EigenAdaptConst<ComplexConversion<T>>((ComplexConversion<T>*)POINTER, mShape.rows(),
mShape.cols());
};
#endif
/**
* Gets the Shape of the Array.
*/
HD Shape shape() const { return mShape; };
/**
* Gets the pointer to this array, depending on host or device.
*/
HD ComplexConversion<T>* data() const { return (ComplexConversion<T>*)POINTER; };
/**
* Returns the device pointer regardless of host or device.
*/
HD T* dataDevice() const { return pDevice; };
HD bool isView() const { return mIsView; }; /**< Gets whether this Array is a view. */
HD bool isSlice() const { return mIsSlice; }; /**< Gets whether this Array is a slice. */
/**
* Gets a view of this Array.
*/
HD Array view() const { return Array(*this, mShape); }
/**
* Copies this Array and returns a new Array with the same memory.
*/
Array copy() const {
Array<T> arr(mShape, (pDevice == nullptr));
auto arr_it = arr.begin();
for (auto it = begin(); it != end(); ++it) {
*arr_it = *it;
++arr_it;
}
#ifndef DEVICE
if (pDevice != nullptr) {
CudaTools::copy(pDevice, arr.dataDevice(), mShape.items() * sizeof(T)).wait();
}
#endif
return arr;
};
/**
* Gets the iterator to the beginning of this Array.
*/
HD ArrayIterator<T> begin() const { return ArrayIterator<T>(POINTER, mShape); };
/**
* Gets the iterator to the end of this Array.
*/
HD ArrayIterator<T> end() const { return ArrayIterator<T>(POINTER + mEndOffset, mShape); };
/**
* Sets the values of the entire Array to a constant. This is restricted to numerical types.
*/
HD void setConstant(const T value) const {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
for (auto it = begin(); it != end(); ++it) {
*it = value;
}
};
/**
* Sets the Array values with uniform random values in a specified range. This is restricted to
* numerical types.
* \brief Host only
*/
void setRandom(const T min, const T max) const {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
if constexpr (is_complex<T>) {
CT_ERROR_IF(max.real(), <, min.real(),
"Upper bound of range cannot be larger than lower bound");
CT_ERROR_IF(max.imag(), <, min.imag(),
"Upper bound of range cannot be larger than lower bound");
} else {
CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
}
std::random_device rd;
std::mt19937 mt(rd());
if constexpr (is_int<T>) {
std::uniform_int_distribution<T> dist(min, max);
for (auto it = begin(); it != end(); ++it) {
*it = dist(mt);
}
} else if constexpr (is_float<T>) {
std::uniform_real_distribution<T> dist(min, max);
for (auto it = begin(); it != end(); ++it) {
*it = dist(mt);
}
} else if constexpr (is_complex<T>) {
std::uniform_real_distribution<ComplexUnderlying<T>> distr(min.real(), max.real());
std::uniform_real_distribution<ComplexUnderlying<T>> disti(min.imag(), max.imag());
for (auto it = begin(); it != end(); ++it) {
*it = T(distr(mt), disti(mt));
}
}
};
/**
* Sets the Array values to start from a value and increment by a specified step. This is
* restricted to numerical types.
*/
HD void setRange(T min, const T step = 1) const {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
for (auto it = begin(); it != end(); ++it) {
*it = min;
min += step;
}
}
/**
* Sets the Array values to be evenly spaced numbers over a given interval. This is restricted
* to floating point types.
*/
HD void setLinspace(const T min, const T max) const {
static_assert(is_float<T>, "Function only available on numeric floating types.");
CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
T i = 0;
T d = max - min;
T items = (T)(shape().items() - 1);
for (auto it = begin(); it != end(); ++it) {
*it = min + d * (i / items);
i += 1;
}
};
/**
* Returns array of given shape with constant values. This is restricted to numerical types.
* \brief Host only
*/
static Array constant(const Shape& shape, const T value) {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
Array<T> arr(shape);
arr.setConstant(value);
return arr;
};
/**
* Returns array of given shape with random values in given interval. This is restricted to
* numerical types.
* \brief Host only
*/
static Array random(const Shape& shape, const T min, const T max) {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
Array<T> arr(shape);
arr.setRandom(min, max);
return arr;
};
/**
* Returns evenly spaced values within a given interval. This is restricted to numerical types.
* \brief Host only
*/
static Array range(const T min, const T max, const T step = 1) {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
Array<T> arr({(uint32_t)((max - min) / step)});
arr.setRange(min, step);
return arr;
}
/**
* Returns evenly spaced values within a given interval. This is restricted to floating point
* types.
* \brief Host only
*/
static Array linspace(const T min, const T max, const uint32_t size) {
static_assert(is_float<T>, "Function only available on numeric floating types.");
Array<T> arr({size});
arr.setLinspace(min, max);
return arr;
}
/**
* Transposes the internal data and returns the corresponding new Array.
* Its self assigning version is transpose. This is restricted to numerical types.
* \brief Host only
*/
#ifdef CUDATOOLS_USE_EIGEN
Array transposed() const {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
Array<T> new_arr({mShape.rows(), mShape.cols()});
new_arr.eigenMap() = this->eigenMap().transpose().eval();
return new_arr;
};
/**
* Transposes the intenal data. Its self assigning version is transpose.
* This is restricted to numerical types.
* \brief Host only
*/
void transpose() {
static_assert(is_host_num<T>, "Function only available on host-compatible numeric types.");
CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
Array<T> new_arr(*this, {mShape.cols(), mShape.rows()});
new_arr.eigenMap() = this->eigenMap().transpose().eval();
mShape = Shape({mShape.cols(), mShape.rows()});
};
void inverse() const {
static_assert(is_float<T>, "Function only available on floating numeric types.");
CT_ERROR_IF(shape().axes(), !=, 2, "Inverse can only occur on two-dimensional arrays");
CT_ERROR_IF(shape().rows(), !=, shape().cols(),
"Inverse can only occur on square matrices");
Array<T> inv(shape());
inv.eigenMap() = this->eigenMap().inverse();
};
#endif
/**
* Pins the memory (page locks) for faster memory transfer in concurrent
* transfers.
* \brief Host only
*/
void pinMemory() const { CudaTools::pin(pHost, mShape.items() * sizeof(T)); };
/**
* Updates the host copy by copying the device data back to the host.
* \brief Host only
*/
StreamID updateHost(const StreamID& stream = DEF_MEM_STREAM) const {
CT_ERROR(mIsSlice, "Cannot update host copy on a slice");
return CudaTools::copy(pDevice, pHost, mShape.items() * sizeof(T), stream);
};
/**
* Updates the device copy by copying the host data to the device.
* \brief Host only
*/
StreamID updateDevice(const StreamID& stream = DEF_MEM_STREAM) const {
CT_ERROR(mIsSlice, "Cannot update device copy on a slice");
return CudaTools::copy(pHost, pDevice, mShape.items() * sizeof(T), stream);
};
#ifdef CUDATOOLS_USE_PYTHON
/**
* Returns a py::array for making an Array available as a Python numpy array.
*/
py::array pyArray() const {
std::vector<py::ssize_t> dims, strides;
for (uint iAxis = 0; iAxis < mShape.axes(); ++iAxis) {
dims.push_back(static_cast<py::ssize_t>(mShape.dim(iAxis)));
strides.push_back(sizeof(T) * static_cast<py::ssize_t>(mShape.stride(iAxis)));
}
return py::array_t<T, py::array::f_style>(
py::buffer_info((void*)pHost, sizeof(T), py::format_descriptor<T>::format(),
static_cast<py::ssize_t>(mShape.axes()), dims, strides));
};
#endif
};
template <typename T>
void printAxis(std::ostream& out, const Array<T>& arr, const uint32_t axis, size_t width) {
std::string space = std::string(2 * axis, ' ');
if (arr.shape().axes() == 1) {
out << "[";
for (uint32_t i = 0; i < arr.shape().items(); ++i) {
if constexpr (std::is_floating_point<T>::value) {
out << std::scientific << std::setprecision(6);
}
if (width == 0) {
out << ((i == 0) ? "" : " ");
} else {
out << std::setw((i == 0) ? width - 1 : width);
}
out << static_cast<T>(arr[i]) << ((i == arr.shape().items() - 1) ? "]" : ",");
}
} else if (arr.shape().axes() == 2) {
for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
out << space << ((i == 0) ? "[" : " ");
printAxis(out, arr[i], axis + 1, width);
out << ((i == arr.shape().dim(0) - 1) ? "]" : ",\n");
}
} else {
out << space << "[\n";
for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
printAxis(out, arr[i], axis + 1, width);
out << ((i == arr.shape().dim(0) - 1) ? "\n" : ",\n\n");
}
out << space << "]";
}
}
template <typename T> std::ostream& operator<<(std::ostream& out, const Array<T>& arr) {
size_t width = 0;
if constexpr (is_int<T>) {
T max_val = 0;
bool negative = false;
for (auto it = arr.begin(); it != arr.end(); ++it) {
T val = *it;
if constexpr (not std::is_unsigned<T>::value) {
if (*it < 0) {
negative = true;
val *= -1;
}
}
max_val = (val > max_val) ? val : max_val;
}
width = std::to_string(max_val).size() + 1;
width += (negative) ? 1 : 0;
} else if constexpr (is_float<T>) {
T max_val = 0;
bool negative = false;
for (auto it = arr.begin(); it != arr.end(); ++it) {
if (*it < 0) negative = true;
int exp = 0;
frexp(*it, &exp);
max_val = (exp > max_val) ? exp : max_val;
}
width = std::to_string(max_val).size() + 5;
width += (negative) ? 1 : 0;
}
printAxis<T>(out, arr, 0, (arr.shape().axes() == 1) ? 0 : width);
return out;
}
}; // namespace CudaTools
#endif // ARRAY_H