commit b4e4a49d44b7ea1671c022d29ce6d24d058c9f74
Author: Kenneth Jao <ksjao2@illinois.edu>
Date:   Tue Apr 18 03:23:12 2023 -0500

    Initial commit with first version of library

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..92cb202
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,10 @@
+---
+BasedOnStyle: LLVM
+IndentWidth: 4
+ColumnLimit: 100
+AllowShortIfStatementsOnASingleLine: true
+---
+Language: Cpp
+DerivePointerAlignment: false
+PointerAlignment: Left
+---
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b9062a2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+build
+*CPU
+*GPU
+.venv
diff --git a/Array.h b/Array.h
new file mode 100644
index 0000000..69b9a06
--- /dev/null
+++ b/Array.h
@@ -0,0 +1,777 @@
+#ifndef ARRAY_H
+#define ARRAY_H
+
+#include "Core.h"
+#include "Macros.h"
+#include <Eigen/Dense>
+#include <iomanip>
+#include <math.h>
+#include <random>
+#include <type_traits>
+
+#ifdef DEVICE
+#define POINTER pDevice
+#else
+#define POINTER pHost
+#endif
+
+namespace CudaTools {
+
+template <typename T>
+using EigenMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
+template <typename T> using EigenMapMat = Eigen::Map<EigenMat<T>>;
+template <typename T> using ConstEigenMapMat = Eigen::Map<const EigenMat<T>>;
+
+template <typename T> struct EigenAdaptConst { typedef EigenMapMat<T> type; };
+template <typename T> struct EigenAdaptConst<const T> { typedef ConstEigenMapMat<T> type; };
+
+#define ENABLE_IF(X) std::enable_if_t<X, bool>
+#define IS_INT(T) std::is_integral<T>::value
+#define IS_FLOAT(T) std::is_floating_point<T>::value
+#define IS_NUM(T) IS_INT(T) or IS_FLOAT(T)
+
+template <typename T> class Array;
+using Slice = std::pair<uint32_t, uint32_t>;
+
+template <typename T> class ArrayIterator {
+  private:
+    template <typename U>
+    friend std::ostream& operator<<(std::ostream& out, const ArrayIterator<U>& it);
+    T* pData;
+    Shape mShape;
+    uint32_t mIndices[CUDATOOLS_ARRAY_MAX_AXES] = {0};
+
+  public:
+    HD ArrayIterator(T* p, const Shape& shape) : pData(p), mShape(shape){};
+
+    /**
+     * Moves the iterator to the next value.
+     */
+    HD void next() {
+        bool carry = false;
+        uint32_t offset = 0;
+        for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
+            if (mIndices[iAxis] == mShape.dim(iAxis) - 1) {
+                mIndices[iAxis] = 0;
+                offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
+                carry = true;
+            } else {
+                pData += mShape.stride(iAxis);
+                mIndices[iAxis] += 1;
+                carry = false;
+            }
+
+            if (not carry) {
+                pData -= offset;
+                return;
+            }
+        }
+        pData += 1; // "Overflow" occured, so we reached end of array.
+    }
+
+    /**
+     * Moves the iterator to the previous value.
+     */
+    HD void prev() {
+        bool carry = false;
+        uint32_t offset = 0;
+        for (uint32_t iAxis = mShape.axes() - 1; iAxis < mShape.axes(); --iAxis) {
+            if (mIndices[iAxis] == 0) {
+                mIndices[iAxis] = mShape.dim(iAxis) - 1;
+                offset += mShape.stride(iAxis) * (mShape.dim(iAxis) - 1);
+                carry = true;
+            } else {
+                pData -= mShape.stride(iAxis);
+                mIndices[iAxis] += 1;
+                carry = false;
+            }
+            if (not carry) {
+                pData += offset;
+                return;
+            }
+        }
+        pData -= 1;
+    }
+
+    /**
+     * Moves the iterator a specified value away.
+     * \param amount the amount to advance by
+     */
+    HD void advance(const int32_t amount) {
+        if (amount < 0) {
+            for (uint32_t i = 0; i < abs(amount); ++i) {
+                prev();
+            }
+        } else {
+            for (uint32_t i = 0; i < abs(amount); ++i) {
+                next();
+            }
+        }
+    }
+
+    HD void operator++() { next(); }; /**< Prefix increment operator. */
+    HD void operator--() { prev(); }; /**< Prefix decrement operator. */
+
+    /**< Addition operator. */
+    HD ArrayIterator<T> operator+(const int32_t v) const {
+        ArrayIterator<T> it = *this;
+        it.advance(v);
+        return it;
+    };
+
+    /** Subtraction operator.*/
+    HD ArrayIterator<T> operator-(const int32_t v) const {
+        ArrayIterator<T> it = *this;
+        it.advance(-v);
+        return it;
+    };
+    HD void operator+=(const int32_t v) { advance(v); };
+    HD void operator-=(const int32_t v) { advance(-v); };
+
+    HD T& operator*() { return *pData; };             /**< Dereference operator. */
+    HD const T& operator*() const { return *pData; }; /**< Const dereference operator. */
+
+    /**
+     * Equals operator.
+     */
+    HD bool operator==(const ArrayIterator<T>& it) { return pData == it.pData; }
+
+    /**
+     * Not equals operator.
+     */
+    HD bool operator!=(const ArrayIterator<T>& it) { return pData != it.pData; }
+};
+
+template <typename T> std::ostream& operator<<(std::ostream& out, const ArrayIterator<T>& it) {
+    return out << it.pData;
+}
+
+template <typename T> class ArrayLoader {
+  private:
+    ArrayIterator<T> mIterator;
+    ArrayIterator<T> mIteratorEnd;
+
+  public:
+    HD ArrayLoader(const ArrayIterator<T>& it, const ArrayIterator<T>& it_end)
+        : mIterator(it), mIteratorEnd(it_end){};
+    HD ArrayLoader &operator,(const T value) {
+        CT_ERROR_IF(mIterator, ==, mIteratorEnd, "Cannot assign more values than Array size");
+        *mIterator = value;
+        ++mIterator;
+        return *this;
+    }
+};
+
+/**
+ * A container that holds a N-dimensional array, stored column major. To set the
+ * maximum N, there is a compiler macro CUDATOOLS_ARRAY_MAX_DIM whose default value is 4.
+ * It adapts to operations between host and device to ease memory management.
+ */
+template <typename T> class Array {
+  private:
+    template <typename U> friend std::ostream& operator<<(std::ostream&, const Array<U>&);
+
+    Shape mShape;
+    T* pHost = nullptr;
+    T* pDevice = nullptr;
+
+    bool mIsView = false;
+    bool mIsSlice = false;
+
+    uint32_t mEndOffset = 0;
+
+    void freeArrays() {
+#ifndef DEVICE
+        if (not mIsView) {
+            if (pDevice != nullptr) CudaTools::free(pDevice);
+            if (pHost != nullptr) delete[] pHost;
+        }
+#endif
+    };
+
+    HD void calcEnd() {
+        uint32_t offset = 0;
+        for (uint32_t i = 0; i < shape().axes(); ++i) {
+            offset += (shape().dim(i) - 1) * shape().stride(i);
+        }
+        mEndOffset = offset + 1;
+    };
+
+  public:
+    HD Array() = default;
+
+    /**
+     * Constructor for an Array that creates an allocates an array with
+     * the specified Shape. Construction in this format is disabled on the device.
+     * \brief Host only
+     * \param shape the shape of the array
+     * \param noDevice whether to initialize the array on the device
+     */
+    Array(const Shape& shape, const bool noDevice = false) : mShape(shape), mIsView(false) {
+        pHost = new T[shape.items()];
+        calcEnd();
+        if (noDevice) return;
+        pDevice = (T*)CudaTools::malloc(shape.items() * sizeof(T));
+    };
+
+    /**
+     * Constructor for an Array from an existing (preallocated) pointer.
+     * \param pointer the pointer to use
+     * \param shape the shape of the array
+     * \param noDevice whether to initialize the array on the device
+     */
+    HD Array(T* const pointer, const Shape& shape, const bool noDevice = false)
+        : mShape(shape), mIsView(true), mIsSlice(false) {
+        POINTER = pointer;
+        calcEnd();
+#ifndef DEVICE
+        if (noDevice) return;
+        pDevice = (T*)CudaTools::malloc(shape.items() * sizeof(T));
+#endif
+    };
+
+    /**
+     * Constructor for making a Array view from another Array,
+     * given an offset and shape.
+     * \param arr the original Array
+     * \param shape the shape of the new array
+     * \param offset the index where to start the a view of the array
+     */
+    HD Array(const Array& arr, const Shape& shape, const uint32_t offset = 0)
+        : mShape(shape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(true),
+          mIsSlice(arr.mIsSlice) {
+        calcEnd();
+        if (pHost != nullptr) pHost += offset;
+        if (pDevice != nullptr) pDevice += offset;
+    };
+
+    /**
+     * The copy-constructor for a Array. If this is not a view, a deep copy
+     * of the data will be performed on both host and device. On the device, it is always
+     * treated like a view.
+     */
+    HD Array(const Array& arr) : mShape(arr.mShape), mIsView(arr.mIsView), mIsSlice(arr.mIsSlice) {
+        calcEnd();
+        if (mIsView) { // If the other array was a view (and now this one), just assign.
+            pHost = arr.pHost;
+            pDevice = arr.pDevice;
+            return;
+        }
+
+        // Otherwise, we assume this is needs to own data.
+        pHost = new T[mShape.items()];
+        auto arr_it = arr.begin();
+        for (auto it = begin(); it != end(); ++it) {
+            *it = *arr_it;
+            ++arr_it;
+        }
+
+#ifndef DEVICE
+        if (arr.pDevice != nullptr) {
+            pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
+        }
+#endif
+    };
+
+    /**
+     *  The move-constructor for a Array.
+     */
+    HD Array(Array&& arr)
+        : mShape(arr.mShape), pHost(arr.pHost), pDevice(arr.pDevice), mIsView(arr.mIsView),
+          mIsSlice(arr.mIsSlice) {
+        calcEnd();
+        // Make other object empty.
+        arr.pHost = nullptr;
+        arr.pDevice = nullptr;
+        arr.mIsView = true;
+    };
+
+    HD ~Array() { freeArrays(); };
+
+    /**
+     * The copy-assignment operator for a Array. If this is not a view,
+     * then the currently owned data will be freed, and a deep copy of the data will
+     * be performed on both host and device. On the device, it is always treated like a view.
+     */
+    HD Array& operator=(const Array& arr) {
+        if (this == &arr) return *this;
+
+        if (mIsView) { // If this array is a view, we assign data from the right-hand side.
+            auto arr_it = arr.begin();
+            for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
+                *it = *arr_it;
+                ++arr_it;
+            }
+            return *this;
+        }
+
+        // Otherwise, it is implied to be object reassignment.
+        mShape = arr.mShape;
+        mIsView = arr.mIsView;
+        mIsSlice = arr.mIsSlice;
+        calcEnd();
+
+        // Regardless if the right-hand side is a view, we create a new copy.
+        // In case that the right-hand side is a view of this array, we
+        // allocate memory to copy first. Keep in mind that the right-hand side
+        // array will then become undefined.
+
+        // We can only do this on the host.
+#ifndef DEVICE
+        T* new_pDevice = nullptr;
+        if (pDevice != nullptr) {
+            new_pDevice = (T*)CudaTools::malloc(mShape.items() * sizeof(T));
+        }
+
+        T* new_pHost = new T[mShape.items()];
+        memcpy(new_pHost, arr.pHost, mShape.items() * sizeof(T));
+
+        freeArrays();
+        pHost = new_pHost;
+        pDevice = new_pDevice;
+#else
+        pHost = arr.pHost;
+        pDevice = arr.pDevice;
+#endif
+        return *this;
+    };
+
+    /**
+     * The move-assignment operator for a Array.
+     */
+    HD Array& operator=(Array&& arr) {
+        if (this == &arr) return *this;
+
+        if (mIsView) { // If this array is a view, we assign data from the right-hand side.
+            auto arr_it = arr.begin();
+            for (auto it = begin(); it != end() and arr_it != arr.end(); ++it) {
+                *it = *arr_it;
+                ++arr_it;
+            }
+            return *this;
+        }
+
+        CT_ERROR(arr.mIsView,
+                 "Cannot move-assign view to a non-view (owner). This would lead to undefined "
+                 "behavior.");
+
+        // Otherwise, it is implied to be object reassignment.
+        freeArrays();
+        mShape = arr.mShape;
+        pHost = arr.pHost;
+        pDevice = arr.pDevice;
+        mIsView = arr.mIsView;
+        mIsSlice = arr.mIsSlice;
+        calcEnd();
+
+        // Make other array empty.
+        arr.pHost = nullptr;
+        arr.pDevice = nullptr;
+        arr.mIsView = true;
+        return *this;
+    };
+
+    /**
+     * Used for indexing the Array.
+     * \param index index of the first dimension
+     */
+    HD Array operator[](const uint32_t index) const {
+        CT_ERROR_IF(index, >=, shape().dim(0), "Index exceeds axis size");
+        return Array(*this, shape().subshape(1), index * shape().stride(0));
+    };
+
+    /**
+     * Used for indexing the Array.
+     * \param indices a list of indices to index the Array
+     */
+    HD Array operator[](const std::initializer_list<uint32_t> indices) const {
+        CT_ERROR_IF(indices.size(), >, shape().axes(),
+                    "Number of indices cannot exceed number of axes");
+        auto it = indices.begin();
+        uint offset = 0;
+        for (uint32_t i = 0; i < indices.size(); ++i) {
+            uint32_t index = *it;
+            CT_ERROR_IF(index, >=, shape().dim(i), "Index exceeds axis size");
+            offset += index * shape().stride(i);
+            ++it;
+        }
+        return Array(*this, shape().subshape(indices.size()), offset);
+    };
+
+    HD ArrayLoader<T> operator<<(const T value) {
+        auto it = begin();
+        *it = value;
+        ++it;
+        return ArrayLoader<T>(it, end());
+    };
+
+    HD T operator=(const T& value) { return POINTER[0] = value; };
+    HD operator T&() { return POINTER[0]; };
+    HD operator const T&() const { return POINTER[0]; };
+
+    /**
+     * Used to create slices of the Array.
+     * \param slices a list of slices to slice the Array
+     */
+    HD Array slice(const std::initializer_list<Slice> slices) const {
+        CT_ERROR_IF(slices.size(), >, shape().axes(),
+                    "Number of slices cannot exceed number of axes");
+
+        uint offset = 0;
+        Shape new_shape = mShape;
+        auto it = slices.begin();
+        for (uint32_t i = 0; i < slices.size(); ++i) {
+            uint32_t from_index = it->first;
+            uint32_t to_index = it->second;
+            CT_ERROR_IF(from_index, >, to_index,
+                        "Slice start cannot be greater than than slice end");
+            CT_ERROR_IF(from_index, >=, shape().dim(i), "Slice start exceeds axis size");
+            CT_ERROR_IF(to_index - 1, >=, shape().dim(i), "Slice end exceeds axis size");
+
+            offset += from_index * shape().stride(i);
+            new_shape.mAxisDim[i] = to_index - from_index;
+            ++it;
+        }
+        new_shape.mItems = 1;
+        for (uint32_t i = 0; i < shape().axes(); ++i) {
+            new_shape.mItems *= new_shape.dim(i);
+        }
+
+        Array<T> arr(*this, new_shape, offset);
+        arr.mIsSlice = true;
+        return arr;
+    };
+
+    /**
+     * Returns this Array with a different Shape. Its self assigning version is reshape.
+     * If this Array is a slice of another, then it will perform a deep copy, and return
+     * a new non-view array.
+     */
+    HD Array reshaped(const Shape& new_shape) const {
+        CT_ERROR_IF(shape().items(), !=, new_shape.items(),
+                    "New shape cannot have a different number of terms");
+        if (mIsSlice) {
+            Array<T> arr = this->copy();
+            return arr.reshaped(new_shape);
+        }
+        Array<T> arr = view();
+        arr.mShape = new_shape;
+        return arr;
+    };
+
+    HD void reshape(const Shape& new_shape) {
+        CT_ERROR_IF(shape().items(), !=, new_shape.items(),
+                    "New shape cannot have a different number of terms");
+        CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)")
+        mShape = new_shape;
+    };
+
+    /**
+     * Gets a view that is has at least two dimensions. Useful for promoting
+     * single vectors to their 2D counterparts.
+     */
+    HD Array atLeast2D() const {
+        return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view();
+    };
+
+    /**
+     * Flattens the Array into one dimension.
+     */
+    HD Array flatten() const { return reshape({mShape.mItems}); };
+
+    /**
+     * Returns the Eigen::Map of this Array.
+     */
+    typename EigenAdaptConst<T>::type eigenMap() const {
+        uint32_t total_dim = mShape.mAxes;
+        CT_ERROR(mIsSlice, "Mapping to an Eigen array cannot occur on slices")
+        CT_ERROR_IF(total_dim, !=, 2,
+                    "Mapping to an Eigen array can only occur on two-dimensional arrays");
+        return typename EigenAdaptConst<T>::type(POINTER, mShape.rows(), mShape.cols());
+    };
+
+    /**
+     * Gets the Shape of the Array.
+     */
+    HD Shape shape() const { return mShape; };
+
+    /**
+     * Gets the pointer to this array, depending on host or device.
+     */
+    HD T* data() const { return POINTER; };
+
+    /**
+     * Returns the device pointer regardless of host or device.
+     */
+    HD T* dataDevice() const { return pDevice; };
+
+    HD bool isView() const { return mIsView; };   /**< Gets whether this Array is a view. */
+    HD bool isSlice() const { return mIsSlice; }; /**< Gets whether this Array is a slice. */
+
+    /**
+     * Gets a view of this Array.
+     */
+    HD Array view() const { return Array(*this, mShape); }
+
+    /**
+     * Copies this Array and returns a new Array with the same memory.
+     */
+    HD Array copy() const {
+        Array<T> arr(mShape, (pDevice == nullptr));
+
+        auto arr_it = arr.begin();
+        for (auto it = begin(); it != end(); ++it) {
+            *arr_it = *it;
+            ++arr_it;
+        }
+#ifndef DEVICE
+        if (pDevice != nullptr) {
+            CudaTools::deviceCopy(pDevice, arr.dataDevice(), mShape.items() * sizeof(T)).wait();
+        }
+#endif
+        return arr;
+    };
+
+    /**
+     * Gets the iterator to the beginning of this Array.
+     */
+    HD ArrayIterator<T> begin() const { return ArrayIterator<T>(POINTER, mShape); };
+
+    /**
+     * Gets the iterator to the end of this Array.
+     */
+    HD ArrayIterator<T> end() const { return ArrayIterator<T>(POINTER + mEndOffset, mShape); };
+
+    /**
+     * Sets the values of the entire Array to a constant. This is restricted to numerical types.
+     */
+    HD void setConstant(const T value) const {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        for (auto it = begin(); it != end(); ++it) {
+            *it = value;
+        }
+    };
+
+    /**
+     * Sets the Array values with uniform random values in a specified range. This is restricted to
+     * numerical types.
+     * \brief Host only
+     */
+    void setRandom(const T min, const T max) const {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
+        std::random_device rd;
+        std::mt19937 mt(rd());
+        if constexpr (IS_INT(T)) {
+            std::uniform_int_distribution<T> dist(min, max);
+            for (auto it = begin(); it != end(); ++it) {
+                *it = dist(mt);
+            }
+        } else if constexpr (IS_FLOAT(T)) {
+            std::uniform_real_distribution<T> dist(min, max);
+            for (auto it = begin(); it != end(); ++it) {
+                *it = dist(mt);
+            }
+        }
+    };
+
+    /**
+     * Sets the Array values to start from a value and increment by a specified step. This is
+     * restricted to numerical types.
+     */
+    HD void setRange(T min, const T step = 1) const {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        for (auto it = begin(); it != end(); ++it) {
+            *it = min;
+            min += step;
+        }
+    }
+    /**
+     * Sets the Array values to be evenly spaced numbers over a given interval. This is restricted
+     * to floating point types.
+     */
+    HD void setLinspace(const T min, const T max) const {
+        static_assert(IS_FLOAT(T), "Function only available on numeric floating types.");
+        CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
+        T i = 0;
+        T d = max - min;
+        T items = (T)(shape().items() - 1);
+        for (auto it = begin(); it != end(); ++it) {
+            *it = min + d * (i / items);
+            i += 1;
+        }
+    };
+
+    /**
+     * Returns array of given shape with constant values. This is restricted to numerical types.
+     * \brief Host only
+     */
+    static Array constant(const Shape& shape, const T value) {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        Array<T> arr(shape);
+        arr.setConstant(value);
+        return arr;
+    };
+
+    /**
+     * Returns array of given shape with random values in given interval. This is restricted to
+     * numerical types.
+     * \brief Host only
+     */
+    static Array random(const Shape& shape, const T min, const T max) {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        Array<T> arr(shape);
+        arr.setRandom(min, max);
+        return arr;
+    };
+
+    /**
+     * Returns evenly spaced values within a given interval. This is restricted to numerical types.
+     * \brief Host only
+     */
+    static Array range(const T min, const T max, const T step = 1) {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        CT_ERROR_IF(max, <, min, "Upper bound of range cannot be larger than lower bound");
+        Array<T> arr({(uint32_t)((max - min) / step)});
+        arr.setRange(min, step);
+        return arr;
+    }
+
+    /**
+     * Returns evenly spaced values within a given interval. This is restricted to floating point
+     * types.
+     * \brief Host only
+     */
+    static Array linspace(const T min, const T max, const uint32_t size) {
+        static_assert(IS_FLOAT(T), "Function only available on numeric floating types.");
+        Array<T> arr({size});
+        arr.setLinspace(min, max);
+        return arr;
+    }
+
+    /**
+     * Transposes the internal data and returns the corresponding new Array.
+     * Its self assigning version is transpose. This is restricted to numerical types.
+     * \brief Host only
+     */
+    Array transposed() const {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
+        Array<T> new_arr({mShape.rows(), mShape.cols()});
+        new_arr.eigenMap() = this->eigenMap().transpose().eval();
+        return new_arr;
+    };
+
+    /**
+     * Transposes the intenal data. Its self assigning version is transpose.
+     * This is restricted to numerical types.
+     * \brief Host only
+     */
+    void transpose() {
+        static_assert(IS_NUM(T), "Function only available on numeric types.");
+        CT_ERROR_IF(shape().axes(), !=, 2, "Tranpose can only occur on two-dimensional arrays");
+        Array<T> new_arr(*this, {mShape.cols(), mShape.rows()});
+        new_arr.eigenMap() = this->eigenMap().transpose().eval();
+        mShape = Shape({mShape.cols(), mShape.rows()});
+    };
+
+    void inverse() const {
+        static_assert(IS_FLOAT(T), "Function only available on floating numeric types.");
+        CT_ERROR_IF(shape().axes(), !=, 2, "Inverse can only occur on two-dimensional arrays");
+        CT_ERROR_IF(shape().rows(), !=, shape().cols(),
+                    "Inverse can only occur on square matrices");
+        Array<T> inv(shape());
+        inv.eigenMap() = this->eigenMap().inverse();
+    };
+
+    /**
+     * Pins the memory (page locks) for faster memory transfer in concurrent
+     * transfers.
+     * \brief Host only
+     */
+    void pinMemory() const { CudaTools::pin(pHost, mShape.items() * sizeof(T)); };
+
+    /**
+     * Updates the host copy by copying the device data back to the host.
+     * \brief Host only
+     */
+    StreamID updateHost(const StreamID& stream = DEF_MEM_STREAM) const {
+        CT_ERROR(mIsView, "Cannot update host on a view");
+        CudaTools::pull(pHost, pDevice, mShape.items() * sizeof(T), stream);
+        return stream;
+    };
+
+    /**
+     * Updates the device copy by copying the host data to the device.
+     * \brief Host only
+     */
+    StreamID updateDevice(const StreamID& stream = DEF_MEM_STREAM) const {
+        CT_ERROR(mIsView, "Cannot update device on a view");
+        CudaTools::push(pHost, pDevice, mShape.items() * sizeof(T), stream);
+        return stream;
+    };
+};
+
+template <typename T>
+void printAxis(std::ostream& out, const Array<T>& arr, const uint32_t axis, size_t width) {
+    std::string space = std::string(2 * axis, ' ');
+    if (arr.shape().axes() == 1) {
+        out << "[";
+        for (uint32_t i = 0; i < arr.shape().items(); ++i) {
+            if constexpr (std::is_floating_point<T>::value) {
+                out << std::scientific << std::setprecision(6);
+            }
+            if (width == 0) {
+                out << ((i == 0) ? "" : " ");
+            } else {
+                out << std::setw((i == 0) ? width - 1 : width);
+            }
+            out << (T)arr[i] << ((i == arr.shape().items() - 1) ? "]" : ",");
+        }
+    } else if (arr.shape().axes() == 2) {
+        for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
+            out << space << ((i == 0) ? "[" : " ");
+            printAxis(out, arr[i], axis + 1, width);
+            out << ((i == arr.shape().dim(0) - 1) ? "]" : ",\n");
+        }
+    } else {
+        out << space << "[\n";
+        for (uint32_t i = 0; i < arr.shape().dim(0); ++i) {
+            printAxis(out, arr[i], axis + 1, width);
+            out << ((i == arr.shape().dim(0) - 1) ? "\n" : ",\n\n");
+        }
+        out << space << "]";
+    }
+}
+
+template <typename T> std::ostream& operator<<(std::ostream& out, const Array<T>& arr) {
+    size_t width = 0;
+    if constexpr (IS_NUM(T)) {
+        T max_val = 0;
+        bool negative = false;
+        for (auto it = arr.begin(); it != arr.end(); ++it) {
+            if (*it < 0) negative = true;
+            max_val = (abs(*it) > max_val) ? abs(*it) : max_val;
+        }
+        width = std::to_string(max_val).size() + 1;
+        width += (negative) ? 1 : 0;
+    } else if constexpr (IS_FLOAT(T)) {
+        T max_val = 0;
+        bool negative = false;
+        for (auto it = arr.begin(); it != arr.end(); ++it) {
+            if (*it < 0) negative = true;
+            int exp = 0;
+            frexp(*it, &exp);
+            max_val = (exp > max_val) ? exp : max_val;
+        }
+        width = std::to_string(max_val).size() + 5;
+        width += (negative) ? 1 : 0;
+    }
+
+    printAxis<T>(out, arr, 0, (arr.shape().axes() == 1) ? 0 : width);
+    return out;
+}
+
+}; // namespace CudaTools
+
+#endif // ARRAY_H
diff --git a/BLAS.h b/BLAS.h
new file mode 100644
index 0000000..b79f4e5
--- /dev/null
+++ b/BLAS.h
@@ -0,0 +1,600 @@
+#ifndef BLAS_H
+#define BLAS_H
+
+#include "Array.h"
+#include "Core.h"
+#include "Macros.h"
+
+namespace CudaTools {
+
+namespace BLAS {
+
+struct BatchInfo {
+    uint32_t strideA, strideB, strideC;
+    uint32_t size;
+};
+
+template <typename T> struct Check {
+    static void isAtLeast2D(const Array<T>& arr, const std::string& name = "Array") {
+        CT_ERROR_IF(arr.shape().axes(), <, 2, (name + " needs to be at least 2D").c_str());
+    };
+
+    static void isSquare(const Array<T>& arr, const std::string& name = "Array") {
+        isAtLeast2D(arr, name);
+        CT_ERROR_IF(arr.shape().rows(), !=, arr.shape().cols(), (name + " is not square").c_str())
+    };
+
+    static void isValidMatmul(const Array<T>& A, const Array<T>& B, const Array<T>& C,
+                              const std::string& nameA = "A", const std::string& nameB = "B",
+                              const std::string nameC = "C") {
+        isAtLeast2D(A, nameA);
+        isAtLeast2D(B, nameB);
+        isAtLeast2D(C, nameB);
+        CT_ERROR_IF(A.shape().cols(), !=, B.shape().rows(),
+                    (nameA + nameB + " is not a valid matrix multiplication").c_str());
+
+        Shape ABshape({A.shape().rows(), B.shape().cols()});
+        Shape Cshape({C.shape().rows(), C.shape().cols()});
+
+        CT_ERROR_IF(
+            ABshape, !=, Cshape,
+            ("The shape of " + nameA + nameB + " does not match the shape of " + nameC).c_str());
+    };
+
+    static uint32_t getUpperItems(const Array<T>& arr) {
+        uint32_t upperItems = 1;
+        for (uint32_t iAxis = 0; iAxis < arr.shape().axes() - 2; ++iAxis) {
+            upperItems *= arr.shape().dim(iAxis);
+        }
+        return upperItems;
+    };
+
+    static void matchUpperShape(const Array<T>& A, const Array<T>& B,
+                                const std::string& nameA = "A", const std::string& nameB = "B") {
+        CT_ERROR_IF(A.shape().axes(), !=, B.shape().axes(),
+                    (nameA + " and " + nameB + " shapes do not match for broadcasting").c_str());
+        for (uint32_t iAxis = 0; iAxis < A.shape().axes() - 2; ++iAxis) {
+            uint32_t Adim = A.shape().dim(iAxis);
+            uint32_t Bdim = B.shape().dim(iAxis);
+            CT_ERROR_IF(
+                Adim, !=, Bdim,
+                (nameA + " and " + nameB + " shapes do not match for broadcasting").c_str());
+        }
+    };
+
+    static BatchInfo isBroadcastable(const Array<T>& A, const Array<T>& B, const Array<T>& C,
+                                     const std::string& nameA = "A", const std::string& nameB = "B",
+                                     const std::string nameC = "C") {
+        isValidMatmul(A, B, C, nameA, nameB, nameC);
+        uint32_t itemsA = getUpperItems(A);
+        uint32_t itemsB = getUpperItems(B);
+        uint32_t itemsC = getUpperItems(C);
+
+        uint32_t Asize = A.shape().rows() * A.shape().cols();
+        uint32_t Bsize = B.shape().rows() * B.shape().cols();
+        uint32_t Csize = C.shape().rows() * C.shape().cols();
+
+        if (itemsA == itemsB) {
+            CT_ERROR_IF(itemsA, !=, itemsC,
+                        ("Incorrect dimensions to broadcast to output " + nameC).c_str());
+            matchUpperShape(A, B, nameA, nameB);
+            matchUpperShape(A, C, nameA, nameC);
+            return BatchInfo{Asize, Bsize, Csize, itemsC};
+        } else if (itemsA > itemsB) {
+            CT_ERROR_IF(
+                itemsB, !=, 1,
+                ("Cannot broadcast operation to " + nameB + " with non-matching " + nameA).c_str());
+            CT_ERROR_IF(itemsA, !=, itemsC,
+                        ("Incorrect dimensions to broadcast to output " + nameC).c_str());
+            matchUpperShape(A, C, nameA, nameC);
+            return BatchInfo{Asize, 0, Csize, itemsC};
+        } else {
+            CT_ERROR_IF(
+                itemsA, !=, 1,
+                ("Cannot broadcast operation to " + nameA + " with non-matching " + nameB).c_str());
+            CT_ERROR_IF(itemsA, !=, itemsC,
+                        ("Incorrect dimensions to broadcast to output " + nameC).c_str());
+            matchUpperShape(B, C, nameB, nameC);
+            return BatchInfo{0, Bsize, Csize, itemsC};
+        }
+    };
+};
+
+/**
+ * Represents a Batch of Arrays with the same shape. Mainly used for cuBLAS functions.
+ */
+template <typename T> class Batch {
+  protected:
+    Array<T*> mBatch;
+    Shape mShape;
+
+    uint32_t mCount = 0;
+    uint32_t mBatchSize;
+
+  public:
+    Batch() = delete;
+
+    /**
+     * Constructs a batch from a given size.
+     */
+    Batch(const uint32_t size) : mBatchSize(size){};
+
+    /**
+     * Constructs a batch from a non-view Array.
+     */
+    Batch(const Array<T>& arr) {
+        CT_ERROR(arr.isView(), "Array cannot be a view");
+        mShape = Shape({arr.shape().rows(), arr.shape().cols()});
+        mBatchSize = mCount = Check<T>::getUpperItems(arr);
+
+        mBatch = Array<T*>({mBatchSize});
+
+        Array<T> batch = arr.reshaped({mBatchSize, mShape.rows(), mShape.cols()});
+        for (uint32_t i = 0; i < mBatchSize; ++i) {
+#ifdef CUDA
+            mBatch[i] = batch[i].dataDevice();
+#else
+            mBatch[i] = batch[i].data();
+#endif
+        }
+
+        mBatch.updateDevice().wait();
+    };
+
+    /**
+     * Adds a matrix to the batch. Array must be a view.
+     */
+    void add(const Array<T>& arr) {
+        CT_ERROR(not arr.isView(), "Cannot add non-view Arrays");
+        CT_ERROR_IF(mCount, ==, mBatchSize, "Batch is full, cannot add more arrays");
+#ifdef CUDA
+        mBatch[mCount] = arr.dataDevice();
+#else
+        mBatch[mCount] = arr.data();
+#endif
+        if (mCount == 0) {
+            mShape = arr.shape();
+            mBatchSize = mCount = Check<T>::getUpperItems(arr);
+        } else {
+            CT_ERROR_IF(arr.shape(), !=, mShape, "Cannot add matrix of different shape to batch");
+        }
+        ++mCount;
+
+        if (mCount == mBatchSize) {
+            mBatch.updateDevice().wait();
+        }
+    };
+
+    /**
+     * Indexing operator which returns a view of the Array in the Batch at the given index.
+     */
+    Array<T> operator[](const uint32_t index) const {
+        CT_ERROR_IF(index, >=, mBatchSize, "Index exceeds batch size");
+        return Array<T>(mBatch[index], {mShape.rows(), mShape.cols()});
+    };
+
+    /**
+     * Returns the batch Array of pointers.
+     */
+    Array<T*> batch() const { return mBatch.view(); };
+    Shape shape() const { return mShape; } /**< Gets the shape of the matrices in the batch. */
+    uint32_t size() const { return mBatchSize; }        /**< Gets the batch size.*/
+    bool full() const { return mBatchSize == mCount; }; /**< Gets if the batch is full. */
+};
+
+////////////////
+// cuBLAS API //
+////////////////
+
+template <typename T, typename F1, typename F2, typename... Args>
+constexpr void invoke(F1 f1, F2 f2, Args&&... args) {
+    if constexpr (std::is_same<T, float>::value) {
+        CUBLAS_CHECK(f1(args...));
+    } else if constexpr (std::is_same<T, double>::value) {
+        CUBLAS_CHECK(f2(args...));
+    } else {
+        CT_ERROR(true, "BLAS functions are not callable with that type");
+    }
+}
+
+/**
+ * Computes the matrix-vector product: \f$ y = \alpha Ax + \beta y \f$. It will automatically
+ * broadcast the operation if applicable.
+ */
+template <typename T>
+StreamID GEMV(const T alpha, const Array<T>& A, const Array<T>& x, const T beta, const Array<T>& y,
+              const StreamID& stream = DEF_CUBLAS_STREAM) {
+
+    BatchInfo bi = Check<T>::isBroadcastable(A, x, y, "A", "x", "y");
+    CT_ERROR_IF(x.shape().cols(), !=, 1, "x must be a column vector");
+    CT_ERROR_IF(y.shape().cols(), !=, 1, "x must be a column vector");
+
+    uint32_t rows = A.shape().rows();
+    uint32_t cols = A.shape().cols();
+    T a = alpha, b = beta;
+#ifdef CUDA
+    CUBLAS_CHECK(
+        cublasSetStream(Manager::get()->cublasHandle(), Manager::get()->stream(stream.id)));
+    if (bi.size == 1) {
+        invoke<T>(cublasSgemv, cublasDgemv, Manager::get()->cublasHandle(), CUBLAS_OP_N, rows, cols,
+                  &a, A.dataDevice(), rows, x.dataDevice(), 1, &b, y.dataDevice(), 1);
+
+    } else { // Greater than 2, so broadcast.
+        invoke<T>(cublasSgemvStridedBatched, cublasDgemvStridedBatched,
+                  Manager::get()->cublasHandle(), CUBLAS_OP_N, rows, cols, &a, A.dataDevice(), rows,
+                  bi.strideA, x.dataDevice(), 1, bi.strideB, &b, y.dataDevice(), 1, bi.strideC,
+                  bi.size);
+    }
+
+#else
+    if (bi.size == 1) {
+        y.eigenMap() = a * (A.eigenMap() * x.eigenMap()) + b * y.eigenMap();
+    } else { // Greater than 2, so broadcast.
+#pragma omp parallel for
+        for (uint32_t i = 0; i < bi.size; ++i) {
+            auto Ai = Array<T>(A, {rows, cols}, i * bi.strideA).eigenMap();
+            auto xi = Array<T>(x, {cols, 1}, i * bi.strideB).eigenMap();
+            auto yi = Array<T>(y, {rows, 1}, i * bi.strideC).eigenMap();
+            yi = a * (Ai * xi) + b * yi;
+        }
+    }
+#endif
+    return StreamID{stream};
+}
+
+/**
+ * Computes the matrix-matrix product: \f$ C = \alpha AB + \beta C \f$. It will automatically
+ * broadcast the operation if applicable.
+ */
+template <typename T>
+StreamID GEMM(const T alpha, const Array<T>& A, const Array<T>& B, const T beta, const Array<T>& C,
+              const StreamID& stream = DEF_CUBLAS_STREAM) {
+
+    BatchInfo bi = Check<T>::isBroadcastable(A, B, C, "A", "B", "C");
+    // A is m x k, B is k x n.
+    uint32_t m = A.shape().rows();
+    uint32_t k = A.shape().cols();
+    uint32_t n = B.shape().cols();
+
+    T a = alpha, b = beta;
+#ifdef CUDA
+    CUBLAS_CHECK(
+        cublasSetStream(Manager::get()->cublasHandle(), Manager::get()->stream(stream.id)));
+    if (bi.size == 1) {
+        invoke<T>(cublasSgemm, cublasDgemm, Manager::get()->cublasHandle(), CUBLAS_OP_N,
+                  CUBLAS_OP_N, m, n, k, &a, A.dataDevice(), m, B.dataDevice(), k, &b,
+                  C.dataDevice(), m);
+
+    } else { // Greater than 2, so broadcast.
+        invoke<T>(cublasSgemmStridedBatched, cublasDgemmStridedBatched,
+                  Manager::get()->cublasHandle(), CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &a,
+                  A.dataDevice(), m, bi.strideA, B.dataDevice(), k, bi.strideB, &b, C.dataDevice(),
+                  m, bi.strideC, bi.size);
+    }
+
+#else
+    if (bi.size == 1) {
+        C.eigenMap() = a * (A.eigenMap() * B.eigenMap()) + b * C.eigenMap();
+    } else { // Greater than 2, so broadcast.
+#pragma omp parallel for
+        for (uint32_t i = 0; i < bi.size; ++i) {
+            auto Ai = Array<T>(A, {m, k}, i * bi.strideA).eigenMap();
+            auto Bi = Array<T>(B, {k, n}, i * bi.strideB).eigenMap();
+            auto Ci = Array<T>(C, {m, n}, i * bi.strideC).eigenMap();
+            Ci = a * (Ai * Bi) + b * Ci;
+        }
+    }
+#endif
+    return StreamID{stream};
+}
+
+/**
+ * Computes the diagonal matrix multiplication: \f$ C = A\mathrm{diag}(X) \f$, or \f$ C =
+ * \mathrm{diag}(X)A \f$ if left = true.
+ */
+template <typename T>
+StreamID DGMM(const Array<T>& A, const Array<T>& X, const Array<T>& C, const bool left = false,
+              const StreamID& stream = DEF_CUBLAS_STREAM) {
+    CT_ERROR_IF(X.shape().cols(), !=, 1, "'x' must be a column vector.");
+    if (left) {
+        CT_ERROR_IF(A.shape().rows(), !=, X.shape().rows(),
+                    "Rows of 'A' and length of 'x' need to match.");
+    } else {
+        CT_ERROR_IF(A.shape().cols(), !=, X.shape().rows(),
+                    "Columns of 'A' and length of 'x' need to match.");
+    }
+    CT_ERROR_IF(A.shape().rows(), !=, C.shape().rows(),
+                "Rows of 'A' and rows() of 'C' need to  match.");
+    CT_ERROR_IF(A.shape().cols(), !=, C.shape().cols(),
+                "Rows of 'A' and columns of 'C' need to match.");
+
+#ifdef CUDA
+    uint32_t m = C.shape().rows();
+    uint32_t n = C.shape().cols();
+    auto mode = (left) ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT;
+    CUBLAS_CHECK(
+        cublasSetStream(Manager::get()->cublasHandle(), Manager::get()->stream(stream.id)));
+    invoke<T>(cublasSdgmm, cublasDdgmm, Manager::get()->cublasHandle(), m, n, A.dataDevice(),
+              A.shape().rows(), X.dataDevice(), 1, C.dataDevice(), m);
+#else
+    if (left) {
+        C.eigenMap() = X.eigenMap().asDiagonal() * A.eigenMap();
+    } else {
+        C.eigenMap() = A.eigenMap() * X.eigenMap().asDiagonal();
+    }
+#endif
+    return StreamID{stream};
+}
+
+//////////////////////////////
+// PLUArray Related Objects //
+//////////////////////////////
+
+///////////////////////////
+// PartialPivLU Wrapper  //
+///////////////////////////
+
+// This class is just a workaround to use Eigen's internals directly.
+template <typename T> class PartialPivLU;
+namespace internal {
+template <typename T> static Array<T> empty({1, 1});
+template <typename T> static EigenMapMat<T> empty_map = empty<T>.eigenMap();
+}; // namespace internal
+
+template <typename T, ENABLE_IF(IS_FLOAT(T)) = true> class PLUArray;
+// This is a wrapper class for Eigen's class so we have more controlled access to
+// the underlying data.
+template <typename T> class PartialPivLU : public Eigen::PartialPivLU<Eigen::Ref<EigenMat<T>>> {
+  private:
+    using Base = Eigen::PartialPivLU<Eigen::Ref<EigenMat<T>>>;
+    template <typename U, ENABLE_IF(IS_FLOAT(U))> friend class PLUArray;
+
+    EigenMapMat<T> mMapLU;
+    EigenMapMat<int32_t> mMapPivots;
+
+  public:
+    PartialPivLU()
+        : Base(internal::empty_map<T>), mMapLU(internal::empty_map<T>),
+          mMapPivots(internal::empty_map<int32_t>){};
+
+    void make(const Array<T>& lu, const Array<int32_t>& pivots) {
+
+        new (&mMapLU) EigenMapMat<T>(lu.eigenMap());
+        new (&mMapPivots) EigenMapMat<int32_t>(pivots.atLeast2D().eigenMap());
+
+        new (&this->m_lu) decltype(Base::m_lu)(mMapLU.derived());
+        new (&this->m_p) decltype(Base::m_p)(mMapPivots.derived());
+
+        // new (&this->m_rowsTranspositions) decltype(Base::m_rowsTranspositions)(
+        //     mMapPivots.derived());
+
+        this->m_l1_norm = 0;
+        this->m_det_p = 0;
+        this->m_isInitialized = true;
+    };
+};
+
+namespace internal {
+// We only create one and copy-construct to avoid the re-initialization.
+template <typename T> static PartialPivLU<T> BlankPPLU = PartialPivLU<T>();
+}; // namespace internal
+
+/**
+ * Class for storing the PLU decomposition an Array. This is restricted to floating point types.
+ */
+template <typename T, ENABLE_IF(IS_FLOAT(T))> class PLUArray {
+  private:
+    Array<T> mLU;
+    Array<int32_t> mPivots;
+    PartialPivLU<T> mPPLU = internal::BlankPPLU<T>;
+
+  public:
+    PLUArray() = delete;
+
+    /**
+     * Constructor for a PLUArray given the matrix dimension.
+     */
+    PLUArray(const uint32_t n) : mLU({n, n}), mPivots({n}) { mPPLU.make(mLU, mPivots); };
+
+    /**
+     * Constructor for a PLUArray given an existing array.
+     */
+    PLUArray(const Array<T>& arr)
+        : mLU((arr.isView()) ? arr.view() : arr), mPivots({arr.shape().rows()}) {
+        CT_ERROR_IF(mLU.shape().axes(), !=, 2, "Array must be a 2D matrix");
+        CT_ERROR_IF(mLU.shape().rows(), !=, mLU.shape().cols(), "Matrix must be square");
+        mPPLU.make(mLU, mPivots);
+    };
+
+    /**
+     * Constructor for a PLUArray given an existing location in memory for both the matrix and
+     * the pivots.
+     */
+    PLUArray(const Array<T>& arr, const Array<int32_t> pivots)
+        : mLU(arr.view()), mPivots(pivots.view()) {
+        CT_ERROR_IF(mLU.shape().axes(), !=, 2, "Array must be a 2D matrix");
+        CT_ERROR_IF(mLU.shape().rows(), !=, mLU.shape().cols(), "Matrix must be square");
+        mPPLU.make(mLU, mPivots);
+    };
+
+    uint32_t rank() { return mLU.shape().rows(); }; /**< Gets the rank of the LU matrix. */
+    Array<T> LU() const { return mLU.view(); };     /**< Gets the LU matrix. */
+    Array<int32_t> pivots() const { return mPivots.view(); }; /**< Gets the LU matrix. */
+
+    /**
+     * Comptues the inplace LU factorization for this array on CPU.
+     */
+    void computeLU() {
+        mPPLU.compute();
+        mPPLU.mMapPivots = mPPLU.permutationP().indices();
+    };
+
+    /**
+     * Solves the system \f$ LUx = b \f$ and returns \f$x\f$.
+     */
+    Array<T> solve(const Array<T>& b) {
+        Array<T> x(b.shape());
+        x.eigenMap() = mPPLU.solve(b.eigenMap());
+        return x;
+    };
+};
+
+/**
+ * This is a batch version of PLUArray, to enable usage of the cuBLAS API. This is restricted to
+ * floating point types.
+ */
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+class PLUBatch : public Batch<T> {
+  private:
+    Array<int32_t> mPivotsBatch;
+    Array<int32_t> mInfoLU;
+    int32_t mInfoSolve;
+
+    bool mInitialized = false;
+
+  public:
+    /**
+     * Constructor of a PLUBatch from a given batch size.
+     */
+    PLUBatch(const uint32_t size) : Batch<T>(size), mInfoLU({size}){};
+
+    /**
+     * Constructor of a PLUBatch from a multi-dimensional array, batched across upper dimensions.
+     */
+    PLUBatch(const Array<T>& arr) : Batch<T>(arr) {
+        Check<T>::isSquare(arr, "LU Array");
+
+        mPivotsBatch = Array<int32_t>({this->mBatchSize * this->mShape.rows()});
+        mInfoLU = Array<int32_t>({this->mBatchSize});
+    };
+
+    /**
+     * Indexing operator which returns the PLUArray in the PLUBatch at the given index.
+     */
+    PLUArray<T> operator[](const uint32_t index) const {
+        CT_ERROR_IF(index, >=, this->mBatchSize, "Index exceeds batch size");
+        Array<T> lu(this->mBatch[index], {this->mShape.rows(), this->mShape.cols()});
+        Array<int32_t> pivots(mPivotsBatch.data() + index * this->mShape.rows(),
+                              {this->mShape.rows()});
+        return PLUArray<T>(lu, pivots);
+    };
+
+    /**
+     * Computes the inplace PLU decomposition of batch of arrays.
+     */
+    StreamID computeLU(const StreamID& stream = DEF_CUBLAS_STREAM) {
+#ifdef CUDA
+        uint32_t n = this->mShape.rows();
+        CUBLAS_CHECK(
+            cublasSetStream(Manager::get()->cublasHandle(), Manager::get()->stream(stream.id)));
+        invoke<T>(cublasSgetrfBatched, cublasDgetrfBatched, Manager::get()->cublasHandle(), n,
+                  this->mBatch.dataDevice(), n, mPivotsBatch.dataDevice(), mInfoLU.dataDevice(),
+                  this->mBatchSize);
+
+#else
+#pragma omp parallel for
+        for (uint32_t i = 0; i < this->mBatchSize; ++i) {
+            (*this)[i].computeLU();
+        }
+#endif
+        mInitialized = true;
+        return stream;
+    };
+
+    /**
+     * Solves the batched system \f$LUx = b\f$ inplace. The solution \f$x\f$ is written back into
+     * \f$b\f$.
+     */
+    StreamID solve(const Batch<T>& b, const StreamID& stream = DEF_CUBLAS_STREAM) {
+        CT_ERROR(not mInitialized,
+                 "Cannot solve system if PLUBatch has not yet computed its LU decomposition");
+        CT_ERROR_IF(b.size(), !=, this->mBatchSize,
+                    "Upper dimensions of b do not match batch size");
+        CT_ERROR_IF(b.shape().rows(), !=, this->mShape.rows(),
+                    "The length of each column of b must match the matrix rank");
+
+#ifdef CUDA
+        uint32_t n = b.shape().rows();
+        uint32_t nrhs = b.shape().cols();
+        CUBLAS_CHECK(
+            cublasSetStream(Manager::get()->cublasHandle(), Manager::get()->stream(stream.id)));
+        invoke<T>(cublasSgetrsBatched, cublasDgetrsBatched, Manager::get()->cublasHandle(),
+                  CUBLAS_OP_N, n, nrhs, this->mBatch.dataDevice(), n, mPivotsBatch.dataDevice(),
+                  b.batch().dataDevice(), n, &mInfoSolve, this->mBatchSize);
+
+#else
+#pragma omp parallel for
+        for (uint32_t i = 0; i < this->mBatchSize; ++i) {
+            b[i] = (*this)[i].solve(b[i]);
+        }
+#endif
+        return stream;
+    };
+
+    /**
+     * Gets the pivots data from the device to the host. Does nothing for CPU.
+     */
+    StreamID getPivots(const StreamID& stream = DEF_MEM_STREAM) const {
+        mPivotsBatch.updateHost(stream);
+        return stream;
+    };
+
+    /**
+     * Gets the info array for the LU decomposition for the device to the host. Does not
+     * return useful information for CPU.
+     */
+    Array<int32_t> getLUInfo() const {
+        mInfoLU.updateHost().wait();
+        return mInfoLU;
+    };
+
+    /**
+     * Checks validity of the solve operation. Does not return useful information for CPU.
+     */
+    int32_t validSolve() const { return mInfoSolve == 0; }
+};
+
+// /**
+//  * Gets the inverse of each A[i], using an already PLU factorized A[i].
+//  * Only available if compiling with CUDA.
+//  */
+// template <typename T>
+// void inverseBatch(const Array<T*>& batchA, const Array<T*>& batchC, const Array<int>&
+// pivots,
+//                   const Array<int>& info, const Shape shapeA, const Shape shapeC,
+//                   const uint stream = 0) {
+// #ifdef CUDA
+//     CT_ERROR_IF(shapeA.rows(), !=, shapeA.cols(),
+//           "'A' needs to be square, rows() and column need to match.");
+//     CT_ERROR_IF(shapeA.rows(), !=, shapeC.cols(), "'A' needs to be the same shape as
+//     'C'."); CT_ERROR_IF(shapeA.rows(), !=, shapeC.rows(), "'A' needs to be the same shape
+//     as 'C'.");
+
+//     CT_ERROR_IF(shapeA.rows(), !=, pivots.shape().rows(),
+//           "Rows()/columns of 'A' and rows() of pivots need to match.");
+//     CT_ERROR_IF(batchA.shape().rows(), !=, pivots.shape().cols(),
+//           "Batch size and columns of pivots need to match.");
+//     CT_ERROR_IF(info.shape().cols(), !=, 1, "Info needs to be a column vector.")
+//     CT_ERROR_IF(batchA.shape().rows(), !=, info.shape().rows(),
+//           "Batch size and length of info need to match.");
+//     CT_ERROR_IF(batchA.shape().rows(), !=, batchC.shape().rows(),
+//           "Batches 'A[i]' and 'C[i]' need to match.");
+
+//     std::string s = "cublas" + std::to_string(stream);
+//     CUBLAS_CHECK(
+//         cublasSetStream(Manager::get()->cublasHandle(),
+//         Manager::get()->stream(s)));
+//     invoke<T>(cublasSgetriBatched, cublasDgetriBatched,
+//     Manager::get()->cublasHandle(),
+//               shapeA.rows(), batchA.dataDevice(), shapeA.rows(), pivots.dataDevice(),
+//               batchC.dataDevice(), shapeC.rows(), info.dataDevice(),
+//               batchA.shape().rows());
+// #else
+//     CT_ERROR_IF(true, ==, true, "inverseBatch is not callable without CUDA.");
+// #endif
+// }
+
+}; // namespace BLAS
+}; // namespace CudaTools
+
+#endif
diff --git a/Core.h b/Core.h
new file mode 100644
index 0000000..2f878d8
--- /dev/null
+++ b/Core.h
@@ -0,0 +1,544 @@
+#ifndef CUDATOOLS_H
+#define CUDATOOLS_H
+
+#include "Macros.h"
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace CudaTools {
+
+/**
+ * Simple wrapper for the name of a stream. Its purposes is to allow for
+ * 'streams' to be passed on host code, and allowing for simple syntax
+ * for waiting.
+ */
+struct StreamID {
+  public:
+    std::string id;
+    StreamID() : id(""){};
+    /**
+     * The constructor for a StreamID.
+     */
+    StreamID(const std::string& id_) : id(id_){};
+    StreamID(const char* id_) : id(id_){};
+
+    /**
+     * Waits for the stream with this stream ID.
+     */
+    void wait() const;
+};
+
+static const StreamID DEF_MEM_STREAM = StreamID{"defaultMemory"};
+static const StreamID DEF_CUBLAS_STREAM = StreamID{"defaultCublas"};
+static const StreamID DEF_KERNEL_STREAM = StreamID{"defaultKernel"};
+
+/**
+ * Allocates memory on the device.
+ */
+void* malloc(const size_t size);
+
+/**
+ * Pins memory on the host.
+ */
+void pin(void* const pHost, const size_t size);
+
+/**
+ * Pushes memory from the device to the host.
+ */
+StreamID push(void* const pHost, void* const pDevice, const size_t size,
+              const StreamID& stream = DEF_MEM_STREAM);
+/**
+ * Pulls memory from the device back to the host.
+ */
+StreamID pull(void* const pHost, void* const pDevice, const size_t size,
+              const StreamID& stream = DEF_MEM_STREAM);
+/**
+ * Copies memory on the device to another location on the device.
+ */
+StreamID deviceCopy(void* const pSrc, void* const pDest, const size_t size,
+                    const StreamID& stream = DEF_MEM_STREAM);
+
+/**
+ * Frees memory on the device.
+ */
+void free(void* const pDevice);
+
+#ifdef CUDACC
+cudaDeviceProp getDeviceProp();
+static cudaDeviceProp DeviceProperties = getDeviceProp();
+const char* cublasGetErrorString(cublasStatus_t status);
+#endif
+
+/**
+ * A class that manages various CUDA Runtime components, such as
+ * streams, events, and handles.
+ */
+class Manager {
+  private:
+    static Manager mManagerInstance;
+    Manager(const std::vector<std::string>& names);
+    ~Manager();
+#ifdef CUDACC
+    std::unordered_map<std::string, cudaStream_t> mStreams;
+    cublasHandle_t mCublas;
+#endif
+  public:
+    /**
+     * Used to get the global CudaTools::Manager instance.
+     */
+    static Manager* get() { return &mManagerInstance; };
+
+    void waitFor(const StreamID& stream) const; /**< Waits for the stream provided. */
+    void sync() const;                          /**< Waits until all device code has finished. */
+    void addStream(const std::string& name);    /**< Creates a stream with the given name. */
+#ifdef CUDACC
+    cudaStream_t stream(const StreamID& stream) const;
+    cublasHandle_t cublasHandle() const;
+#endif
+};
+
+namespace Kernel {
+
+/**
+ * A struct that contains the kernel launch parameters.
+ */
+struct Settings {
+  public:
+#ifdef CUDACC
+    dim3 blockGrid;
+    dim3 threadBlock;
+    size_t sharedMemoryBytes = 0;
+#else
+    size_t threads;
+#endif
+    StreamID stream;
+
+    Settings() = default;
+
+    void setGridDim(const size_t x);                 /**< Sets the Grid dimensions. */
+    void setGridDim(const size_t x, const size_t y); /**< Sets the Grid dimensions. */
+    void setGridDim(const size_t x, const size_t y,
+                    const size_t z);                  /**< Sets the Grid dimensions. */
+    void setBlockDim(const size_t x);                 /**< Sets the Thread Block dimensions. */
+    void setBlockDim(const size_t x, const size_t y); /**< Sets the Thread Block dimensions. */
+    void setBlockDim(const size_t x, const size_t y,
+                     const size_t z); /**< Sets the Thread Block dimensions. */
+
+    void setSharedMemSize(const size_t bytes); /**< Sets the static shared memory size. */
+    void setStream(const StreamID& stream);    /**< Sets the stream. */
+};
+
+/**
+ * Returns a kernel launch parameters based on the number of threads, and optionally
+ * a stream. Should only be used for 'embarassingly parallel' situations, or where
+ * each thread corresponds some sort of index.
+ */
+Settings basic(const size_t threads, const StreamID& stream = DEF_KERNEL_STREAM);
+
+}; // namespace Kernel
+
+template <typename T> class Array;
+
+/**
+ * A class that holds information about an Array.
+ */
+class Shape {
+  private:
+    template <typename T> friend class Array;
+    uint32_t mAxes;
+    uint32_t mItems;
+    uint32_t mAxisDim[CUDATOOLS_ARRAY_MAX_AXES] = {0};
+    uint32_t mStride[CUDATOOLS_ARRAY_MAX_AXES] = {0};
+
+  public:
+    HD Shape() : mAxes(0), mItems(1){};
+    /**
+     * The constructor for a Shape.
+     * \param dims an initializer list of the dimensions.
+     */
+    HD Shape(const std::initializer_list<uint32_t> dims);
+
+    HD uint32_t axes() const;  /**< Gets the number of axes. */
+    HD uint32_t items() const; /**< Gets the total number of items. */
+
+    HD uint32_t length() const; /**< For 1D shapes, gets the length. In general, gets the dimension
+                                   of the last axis. */
+    HD uint32_t rows() const;   /**< For 2D shapes, gets the number of rows. In general, gets the
+                                   dimension of the second to last axis. */
+    HD uint32_t cols() const;   /**< For 2D shapes, gets the number of columns. In general, gets the
+                                   dimension of the second to last axis. */
+
+    HD uint32_t
+    dim(const uint32_t axis) const; /**< Gets the dimension size of the specified axis. */
+    HD uint32_t stride(const uint32_t axis) const; /**< Gets the stride of the specified axis. */
+
+    /**
+     * Gets the shape at a specific axis of this shape.
+     * \param axis the axis of where the new shape starts.
+     */
+    HD Shape subshape(const uint32_t axis) const;
+
+    HD bool operator==(const Shape& s) const; /**< Equals operator. */
+    HD bool operator!=(const Shape& s) const; /**< Not equals operator. */
+};
+
+std::ostream& operator<<(std::ostream& out, const Shape& s);
+
+}; // namespace CudaTools
+
+#ifdef CUDATOOLS_IMPLEMENTATION
+
+namespace CudaTools {
+
+template <typename T, typename... Args>
+StreamID runKernel(T func, const Kernel::Settings& sett, Args... args) {
+#ifdef CUDA
+    func<<<sett.blockGrid, sett.threadBlock, sett.sharedMemoryBytes,
+           Manager::get()->stream(sett.stream.id)>>>(args...);
+#else
+    func(args...);
+#endif
+    return sett.stream;
+}
+
+////////////////////
+// Memory Methods //
+////////////////////
+
+void StreamID::wait() const { Manager::get()->waitFor(id); }
+
+void* malloc(const size_t size) {
+#ifdef CUDACC
+    void* pDevice;
+    CUDA_CHECK(cudaMalloc(&pDevice, size));
+    return pDevice;
+#else
+    return nullptr;
+#endif
+}
+
+void free(void* const pDevice) {
+#ifdef CUDACC
+    if (pDevice != nullptr) CUDA_CHECK(cudaFree(pDevice));
+#endif
+}
+
+StreamID push(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) {
+#ifdef CUDACC
+    CUDA_CHECK(cudaMemcpyAsync(pDevice, pHost, size, cudaMemcpyHostToDevice,
+                               Manager::get()->stream(stream.id)));
+#endif
+    return stream;
+}
+
+StreamID pull(void* const pHost, void* const pDevice, const size_t size, const StreamID& stream) {
+#ifdef CUDACC
+    CUDA_CHECK(cudaMemcpyAsync(pHost, pDevice, size, cudaMemcpyDeviceToHost,
+                               Manager::get()->stream(stream.id)));
+#endif
+    return stream;
+}
+
+StreamID deviceCopy(void* const pSrc, void* const pDest, const size_t size,
+                    const StreamID& stream) {
+#ifdef CUDACC
+    CUDA_CHECK(cudaMemcpyAsync(pDest, pSrc, size, cudaMemcpyDeviceToDevice,
+                               Manager::get()->stream(stream.id)));
+#endif
+    return stream;
+}
+
+void pin(void* const pHost, const size_t size) {
+#ifdef CUDACC
+    CUDA_CHECK(cudaHostRegister(pHost, size, cudaHostRegisterDefault));
+#endif
+}
+
+#ifdef CUDACC
+cudaDeviceProp getDeviceProp() {
+    cudaSetDevice(0);
+    cudaDeviceProp deviceProp;
+    cudaGetDeviceProperties(&deviceProp, 0);
+    return deviceProp;
+}
+#endif
+
+/////////////////////
+// Manager Methods //
+/////////////////////
+
+Manager::Manager(const std::vector<std::string>& names) {
+#ifdef CUDACC
+    for (auto name : names) {
+        addStream(name);
+    }
+    CUBLAS_CHECK(cublasCreate(&mCublas));
+#endif
+}
+
+Manager::~Manager() {
+#ifdef CUDACC
+    for (auto& it : mStreams) {
+        CUDA_CHECK(cudaStreamDestroy(it.second));
+    }
+    CUBLAS_CHECK(cublasDestroy(mCublas));
+#endif
+}
+
+void Manager::waitFor(const StreamID& stream) const {
+#ifdef CUDACC
+    auto it = mStreams.find(stream.id);
+    if (it != mStreams.end()) {
+        CUDA_CHECK(cudaStreamSynchronize(it->second));
+    } else {
+        CT_ERROR(true, ("Invalid stream " + stream.id).c_str());
+    }
+#endif
+}
+
+void Manager::sync() const {
+#ifdef CUDACC
+    CUDA_CHECK(cudaDeviceSynchronize());
+#endif
+}
+
+void Manager::addStream(const std::string& name) {
+#ifdef CUDACC
+    cudaStream_t s;
+    CUDA_CHECK(cudaStreamCreate(&s));
+    mStreams[name] = s;
+#endif
+}
+
+#ifdef CUDACC
+cudaStream_t Manager::stream(const StreamID& stream) const {
+    auto it = mStreams.find(stream.id);
+    if (it != mStreams.end()) {
+        return it->second;
+    } else {
+        CT_ERROR(true, ("Invalid stream " + stream.id).c_str());
+    }
+}
+
+cublasHandle_t Manager::cublasHandle() const { return mCublas; };
+
+Manager Manager::mManagerInstance = Manager({"defaultMemory", "defaultCublas", "defaultKernel"});
+#else
+Manager Manager::mManagerInstance = Manager({""});
+#endif
+
+////////////////////
+// Kernel Methods //
+////////////////////
+
+namespace Kernel {
+
+void Settings::setGridDim(const size_t x) {
+#ifdef CUDACC
+    CT_ERROR_IF(x, >, DeviceProperties.maxGridSize[0], "Total grid size too large")
+    blockGrid.x = x;
+    blockGrid.y = 1;
+    blockGrid.z = 1;
+#endif
+}
+
+void Settings::setGridDim(const size_t x, const size_t y) {
+#ifdef CUDACC
+    CT_ERROR_IF(x * y, >, DeviceProperties.maxGridSize[0], "Total grid size too large.");
+    CT_ERROR_IF(x, >, DeviceProperties.maxGridSize[0], "Grid dimension 'x' too large.");
+    CT_ERROR_IF(y, >, DeviceProperties.maxGridSize[1], "Grid dimension 'y' too large.");
+    blockGrid.x = x;
+    blockGrid.y = y;
+    blockGrid.z = 1;
+#endif
+}
+
+void Settings::setGridDim(const size_t x, const size_t y, const size_t z) {
+#ifdef CUDACC
+    CT_ERROR_IF(x * y * z, >, DeviceProperties.maxGridSize[0], "Total grid size too large.");
+    CT_ERROR_IF(x, >, DeviceProperties.maxGridSize[0], "Grid dimension 'x' too large.");
+    CT_ERROR_IF(y, >, DeviceProperties.maxGridSize[1], "Grid dimension 'y' too large.");
+    CT_ERROR_IF(z, >, DeviceProperties.maxGridSize[2], "Grid dimension 'z' too large.");
+    blockGrid.x = x;
+    blockGrid.y = y;
+    blockGrid.z = z;
+#endif
+}
+
+void Settings::setBlockDim(const size_t x) {
+#ifdef CUDACC
+    CT_ERROR_IF(x, >, DeviceProperties.maxThreadsDim[0], "Total block size too large.");
+    threadBlock.x = x;
+    threadBlock.y = 1;
+    threadBlock.z = 1;
+#endif
+}
+
+void Settings::setBlockDim(const size_t x, const size_t y) {
+#ifdef CUDACC
+    CT_ERROR_IF(x * y, >, DeviceProperties.maxThreadsDim[0], "Total block size too large.");
+    CT_ERROR_IF(x, >, DeviceProperties.maxThreadsDim[0], "Block dimension 'x' too large.");
+    CT_ERROR_IF(y, >, DeviceProperties.maxThreadsDim[1], "Block dimension 'y' too large.");
+    threadBlock.x = x;
+    threadBlock.y = y;
+    threadBlock.z = 1;
+#endif
+}
+
+void Settings::setBlockDim(const size_t x, const size_t y, const size_t z) {
+#ifdef CUDACC
+    CT_ERROR_IF(x * y * z, >, DeviceProperties.maxThreadsDim[0], "Total block size too large.");
+    CT_ERROR_IF(x, >, DeviceProperties.maxThreadsDim[0], "Block dimension 'x' too large.");
+    CT_ERROR_IF(y, >, DeviceProperties.maxThreadsDim[1], "Block dimension 'y' too large.");
+    CT_ERROR_IF(z, >, DeviceProperties.maxThreadsDim[2], "Block dimension 'z' too large.");
+    threadBlock.x = x;
+    threadBlock.y = y;
+    threadBlock.z = z;
+#endif
+}
+
+void Settings::setSharedMemSize(const size_t bytes) {
+#ifdef CUDACC
+    sharedMemoryBytes = bytes;
+#endif
+}
+
+void Settings::setStream(const StreamID& stream_) {
+#ifdef CUDACC
+    stream.id = stream_.id;
+#endif
+}
+
+Settings basic(const size_t threads, const StreamID& stream) {
+    Settings sett;
+#ifdef CUDACC
+    auto max_threads = DeviceProperties.maxThreadsPerBlock;
+    size_t grid_blocks = (threads + max_threads - 1) / max_threads;   // ceil(threads / max_threads)
+    size_t block_threads = (threads + grid_blocks - 1) / grid_blocks; // ceil(threads / grid_blocks)
+    sett.setGridDim(grid_blocks);
+    sett.setBlockDim(block_threads);
+    sett.setStream(stream);
+#else
+    sett.threads = threads;
+#endif
+    return sett;
+}
+} // namespace Kernel
+
+/////////////////////
+// Shape Functions //
+/////////////////////
+
+HD Shape::Shape(const std::initializer_list<uint32_t> dims) : mAxes(dims.size()), mItems(1) {
+    CT_ERROR_IF(dims.size(), >, CUDATOOLS_ARRAY_MAX_AXES, "Number of axes exceeds max axes");
+    mAxes = dims.size();
+    if (mAxes == 0) return;
+
+    auto it = dims.end() - 1;
+    mItems = 1;
+    for (uint32_t iAxis = mAxes - 1; iAxis < mAxes; --iAxis) {
+        uint32_t dim = *it;
+        CT_ERROR_IF(dim, ==, 0, "Axis dimension cannot be 0");
+
+        mAxisDim[iAxis] = dim;
+        mStride[iAxis] = mItems;
+        mItems *= dim;
+        --it;
+    }
+
+    if (mAxes == 1) return;
+    // Swap last two, for column major storage.
+    mStride[mAxes - 2] = 1;
+    mStride[mAxes - 1] = mAxisDim[mAxes - 2];
+}
+
+HD uint32_t Shape::axes() const { return mAxes; };
+HD uint32_t Shape::items() const { return mItems; };
+HD uint32_t Shape::length() const { return mAxisDim[mAxes - 1]; }
+
+HD uint32_t Shape::rows() const { return mAxisDim[mAxes - 2]; }
+
+HD uint32_t Shape::cols() const { return mAxisDim[mAxes - 1]; }
+
+HD uint32_t Shape::dim(const uint32_t axis) const { return mAxisDim[axis]; }
+HD uint32_t Shape::stride(const uint32_t axis) const { return mStride[axis]; }
+
+HD bool Shape::operator==(const Shape& s) const {
+    if (mAxes != s.mAxes) {
+        return false;
+    }
+    for (uint32_t iAxis = 0; iAxis < mAxes; ++iAxis) {
+        if (mAxisDim[iAxis] != s.mAxisDim[iAxis]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+HD bool Shape::operator!=(const Shape& s) const { return not(*this == s); }
+
+HD Shape Shape::subshape(const uint32_t axis) const {
+    CT_ERROR_IF(axis, >, mAxes, "Axis number exceeds number of axes.");
+    if (axis == mAxes) return Shape({1});
+
+    Shape new_shape({});
+    new_shape.mAxes = mAxes - axis;
+    new_shape.mItems = mItems;
+
+    for (uint32_t iAxis = 0; iAxis < axis; iAxis++) {
+        new_shape.mItems /= mAxisDim[iAxis];
+    }
+    for (uint32_t iAxis = axis; iAxis < mAxes; iAxis++) {
+        new_shape.mAxisDim[iAxis - axis] = mAxisDim[iAxis];
+        new_shape.mStride[iAxis - axis] = mStride[iAxis];
+    }
+    return new_shape;
+}
+
+std::ostream& operator<<(std::ostream& out, const Shape& s) {
+    out << "(";
+    if (s.axes() == 0) return out << ")";
+    for (uint32_t iAxis = 0; iAxis < s.axes() - 1; ++iAxis) {
+        out << s.dim(iAxis) << ", ";
+    }
+    return out << s.dim(s.axes() - 1) << ")";
+}
+
+#ifdef CUDACC
+const char* cublasGetErrorString(cublasStatus_t error) {
+    switch (error) {
+    case CUBLAS_STATUS_SUCCESS:
+        return "CUBLAS_STATUS_SUCCESS";
+
+    case CUBLAS_STATUS_NOT_INITIALIZED:
+        return "CUBLAS_STATUS_NOT_INITIALIZED";
+
+    case CUBLAS_STATUS_ALLOC_FAILED:
+        return "CUBLAS_STATUS_ALLOC_FAILED";
+
+    case CUBLAS_STATUS_INVALID_VALUE:
+        return "CUBLAS_STATUS_INVALID_VALUE";
+
+    case CUBLAS_STATUS_ARCH_MISMATCH:
+        return "CUBLAS_STATUS_ARCH_MISMATCH";
+
+    case CUBLAS_STATUS_MAPPING_ERROR:
+        return "CUBLAS_STATUS_MAPPING_ERROR";
+
+    case CUBLAS_STATUS_EXECUTION_FAILED:
+        return "CUBLAS_STATUS_EXECUTION_FAILED";
+
+    case CUBLAS_STATUS_INTERNAL_ERROR:
+        return "CUBLAS_STATUS_INTERNAL_ERROR";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+};     // namespace CudaTools
+#endif // CUDATOOLS_IMPLEMENTATION
+
+#endif // CUDATOOLS_H
diff --git a/Macros.h b/Macros.h
new file mode 100644
index 0000000..4ffce1a
--- /dev/null
+++ b/Macros.h
@@ -0,0 +1,297 @@
+#ifndef MACROS_H
+#define MACROS_H
+
+#include <exception>
+#include <sstream>
+#include <stdarg.h>
+
+#if defined(CUDA) && defined(__CUDACC__)
+#define CUDACC
+#endif
+
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0)
+#define DEVICE
+#endif
+
+#ifdef CUDATOOLS_DOXYGEN
+/**
+ * \def CUDACC
+ * This macro is defined when this code is being compiled by nvcc and the CUDA compilation
+ * flag is set. This should be used to enclose code where CUDA specific libraries and syntax are
+ * being used.
+ */
+#define CUDACC
+
+/**
+ * \def DEVICE
+ * This macro is defined when this code is being compiled for the device. The difference between
+ * this and CUDACC is that this should exclusively be used to dcide if code is being compiled
+ * to execute on the device. CUDACC is only determines what compiler is being used.
+ */
+#define DEVICE
+
+/**
+ * \def HD
+ * Mark a function in front with this if it needs to be callable on both the
+ * CPU and CUDA device.
+ */
+#define HD
+
+/**
+ * \def SHARED
+ * Mark a variable as static shared memory.
+ */
+#define SHARED
+
+/**
+ * \def DECLARE_KERNEL(call, ...)
+ * Used to declare (in header) a CUDA kernel.
+ * \param call the name of the kernel
+ * \param ... the arguments of the kernel
+ */
+#define DECLARE_KERNEL(call, ...)
+
+/**
+ * \def DEFINE_KERNEL(call, ...)
+ * Used to define (in implementation) a CUDA kernel.
+ * \param call the name of the kernel
+ * \param ... the arguments of the kernel
+ */
+#define DEFINE_KERNEL(call, ...)
+
+/**
+ * \def KERNEL(call, settings, ...)
+ * Used to call a CUDA kernel.
+ * \param call the name of the kernel
+ * \param settings the associated CudaTools::Kernel::Settings to initialize the kernel with
+ * \param ... the arguments of the kernel
+ */
+#define KERNEL(call, settings, ...)
+
+/**
+ * \def BASIC_LOOP(N)
+ * Can be used in conjunction with CudaTools::Kernel::Basic, which is mainly used for embarassingly
+ * parallel situations. Exposes the loop/thread number as iThread.
+ * \param N number of iterations
+ */
+#define BASIC_LOOP(N)
+
+/**
+ * \def DEVICE_CLASS(name)
+ * Can be used inside a class declaration (header) which generates boilerplate code to allow this
+ * class to be used on the device.
+ *
+ * This macro creates a few functions:\n
+ * name* that(): returns the pointer to this instance on the device.
+ *
+ * void allocateDevice(): allocates the memory on the device for this class instance.
+ *
+ * CudaTools::StreamID updateHost(const CudaTools::StreamID& stream): updates the host instance
+ * of the class.
+ *
+ * CudaTools::StreamID updateDevice(const CudaTools::StreamID& stream): updates
+ * the device instance of the class.
+ * \param name the name of the class
+ */
+#define DEVICE_CLASS(name)
+
+/**
+ * \def CT_ERROR_IF(a, op, b, msg)
+ * Used for throwing runtime errors given a condition with an operator.
+ */
+#define CT_ERROR_IF(a, op, b, msg)
+
+/**
+ * \def CT_ERROR(a, msg)
+ * Used for throwing runtime errors given a bool.
+ */
+#define CT_ERROR(a, msg)
+
+/**
+ * \def CUDA_CHECK(call)
+ * Gets the error generated by a CUDA function call if there is one.
+ * \param call CUDA function to check if there are errors when running.
+ */
+#define CUDA_CHECK(call)
+
+/**
+ * \def CUBLAS_CHECK(call)
+ * Gets the error generated by a cuBLAS function call if there is one.
+ * \param call cuBLAS function to check if there are errors when running.
+ */
+#define CUBLAS_CHECK(call)
+
+/**
+ * \def CUDA_MEM(call)
+ * Gets the GPU memory used from function call if there is one.
+ * \param call function to measure memory usage.
+ * \param name an identifier to use as a variable and when printing. Must satisfy variable naming.
+ */
+#define CUDA_MEM(call, name)
+#endif
+
+///////////////////
+// KERNEL MACROS //
+///////////////////
+
+#ifdef CUDACC
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+
+#define HD __host__ __device__
+#define SHARED __shared__
+
+#define DECLARE_KERNEL(call, ...) __global__ void call(__VA_ARGS__)
+
+#define DEFINE_KERNEL(call, ...)                                                                   \
+    template CudaTools::StreamID CudaTools::runKernel(                                             \
+        void (*)(__VA_ARGS__), const CudaTools::Kernel::Settings&, __VA_ARGS__);                   \
+    __global__ void call(__VA_ARGS__)
+
+#else
+#define HD
+#define SHARED
+
+#define DECLARE_KERNEL(call, ...) void call(__VA_ARGS__)
+
+#define DEFINE_KERNEL(call, ...)                                                                   \
+    template CudaTools::StreamID CudaTools::runKernel(                                             \
+        void (*)(__VA_ARGS__), const CudaTools::Kernel::Settings&, __VA_ARGS__);                   \
+    void call(__VA_ARGS__)
+
+#endif // CUDACC
+
+#define KERNEL(call, settings, ...) CudaTools::runKernel(call, settings, __VA_ARGS__)
+
+///////////////////
+// DEVICE MACROS //
+///////////////////
+
+#ifdef DEVICE
+
+#define BASIC_LOOP(N)                                                                              \
+    uint32_t iThread = blockIdx.x * blockDim.x + threadIdx.x;                                      \
+    if (iThread < N)
+#else
+#define BASIC_LOOP(N) _Pragma("omp parallel for") for (uint32_t iThread = 0; iThread < N; ++iThread)
+
+#endif
+
+//////////////////
+// CLASS MACROS //
+//////////////////
+
+#define UPDATE_FUNC(name)                                                                          \
+    inline CudaTools::StreamID updateHost(const CudaTools::StreamID& stream =                      \
+                                              CudaTools::DEF_MEM_STREAM) {                         \
+        return CudaTools::pull(this, that(), sizeof(name));                                        \
+    };                                                                                             \
+    inline CudaTools::StreamID updateDevice(const CudaTools::StreamID& stream =                    \
+                                                CudaTools::DEF_MEM_STREAM) {                       \
+        return CudaTools::push(this, that(), sizeof(name));                                        \
+    }
+
+#ifdef CUDA
+
+#define DEVICE_CLASS(name)                                                                         \
+  private:                                                                                         \
+    name* __deviceInstance__ = nullptr;                                                            \
+                                                                                                   \
+  public:                                                                                          \
+    inline name* that() { return __deviceInstance__; }                                             \
+    inline void allocateDevice() { __deviceInstance__ = (name*)CudaTools::malloc(sizeof(name)); }; \
+    UPDATE_FUNC(name)
+
+#else
+#define DEVICE_CLASS(name)                                                                         \
+  public:                                                                                          \
+    inline name* that() { return this; };                                                          \
+    inline void allocateDevice(){};                                                                \
+    UPDATE_FUNC(name)
+
+#endif
+
+#ifndef CUDATOOLS_ARRAY_MAX_AXES
+/**
+ * \def CUDATOOLS_ARRAY_MAX_AXES
+ * The maximum number of axes/dimensions an CudaTools::Array can have. The default is
+ * set to 4, but can be manully set fit the program needs.
+ */
+#define CUDATOOLS_ARRAY_MAX_AXES 4
+#endif
+
+////////////////////
+// Error Checking //
+////////////////////
+
+#ifndef NO_DIMENSION_CHECK
+#ifdef DEVICE
+#define CT_ERROR_IF(a, op, b, msg)                                                                 \
+    if (a op b) {                                                                                  \
+        printf("[ERROR] %s:%d\n | %s: (" #a ") " #op " (" #b ").\n", __FILE__, __LINE__, msg);     \
+    }
+
+#define CT_ERROR(a, msg)                                                                           \
+    if (a) {                                                                                       \
+        printf("[ERROR] %s:%d\n | %s: " #a ".\n", __FILE__, __LINE__, msg);                        \
+    }
+#else
+
+#define CT_ERROR_IF(a, op, b, msg)                                                                 \
+    if (a op b) {                                                                                  \
+        std::ostringstream os_a;                                                                   \
+        std::ostringstream os_b;                                                                   \
+        os_a << a;                                                                                 \
+        os_b << b;                                                                                 \
+        printf("[ERROR] %s:%d\n | %s: (" #a ")%s " #op " (" #b ")%s.\n", __FILE__, __LINE__, msg,  \
+               os_a.str().c_str(), os_b.str().c_str());                                            \
+        throw std::exception();                                                                    \
+    }
+
+#define CT_ERROR(a, msg)                                                                           \
+    if (a) {                                                                                       \
+        printf("[ERROR] %s:%d\n | %s: " #a ".\n", __FILE__, __LINE__, msg);                        \
+        throw std::exception();                                                                    \
+    }
+#endif
+
+#endif // NO_DIMENSION_CHECK
+
+#if defined(CUDACC) && !defined(NO_CUDA_CHECK)
+
+#define CUDA_CHECK(call)                                                                           \
+    do {                                                                                           \
+        cudaError_t err = (call);                                                                  \
+        if (err != cudaSuccess) {                                                                  \
+            printf("[CUDA] %s:%d\n | %s\n", __FILE__, __LINE__, cudaGetErrorString(err));          \
+            throw std::exception();                                                                \
+        }                                                                                          \
+    } while (0)
+
+#define CUBLAS_CHECK(call)                                                                         \
+    do {                                                                                           \
+        cublasStatus_t err = (call);                                                               \
+        if (err != CUBLAS_STATUS_SUCCESS) {                                                        \
+            printf("[cuBLAS] %s:%d\n | %s\n", __FILE__, __LINE__,                                  \
+                   CudaTools::cublasGetErrorString(err));                                          \
+            throw std::exception();                                                                \
+        }                                                                                          \
+    } while (0)
+
+#define CUDA_MEM(call, name)                                                                       \
+    size_t free_bef_##name, free_aft_##name;                                                       \
+    cudaMemGetInfo(&free_bef_##name, NULL);                                                        \
+    call;                                                                                          \
+    CudaTools::Manager::get()->sync();                                                             \
+    cudaMemGetInfo(&free_aft_##name, NULL);                                                        \
+    printf("[%s] GPU Memory Usage: %iMiB\n", #name,                                                \
+           (free_bef_##name - free_aft_##name) / (1024 * 1024));
+
+#else
+#define CUDA_CHECK(call) (call)
+#define CUBLAS_CHECK(call) (call)
+#define CUDA_MEM(call, name) (call)
+#endif
+
+#endif // MACROS_H
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..d1cabb7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,95 @@
+CC := g++-10
+NVCC := nvcc
+CFLAGS := -Wall -std=c++17 -fopenmp -MMD
+NVCC_FLAGS := -MMD -w -Xcompiler
+
+INCLUDE :=
+LIBS_DIR :=
+LIBS_DIR_GPU := /usr/local/cuda/lib64
+LIBS :=
+LIBS_GPU := cuda cudart cublas
+
+TARGET = tests
+SRC_DIR = .
+BUILD_DIR = build
+
+# Should not need to modify below.
+
+CPU_BUILD_DIR = $(BUILD_DIR)/cpu
+GPU_BUILD_DIR = $(BUILD_DIR)/gpu
+
+SRC = $(wildcard $(SRC_DIR)/*/*.cpp) $(wildcard $(SRC_DIR)/*.cpp)
+
+# Get source files and object files.
+GCC_SRC = $(filter-out %.cu.cpp ,$(SRC))
+NVCC_SRC = $(filter %.cu.cpp, $(SRC))
+GCC_OBJ = $(GCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+NVCC_OBJ = $(NVCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+
+# If compiling for CPU, all go to GCC. Otherwise, they are split.
+CPU_OBJ = $(addprefix $(CPU_BUILD_DIR)/,$(GCC_OBJ)) $(addprefix $(CPU_BUILD_DIR)/,$(NVCC_OBJ))
+GPU_GCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(GCC_OBJ))
+GPU_NVCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(NVCC_OBJ))
+
+# $(info $$GCC_SRC is [${GCC_SRC}])
+# $(info $$NVCC_SRC is [${NVCC_SRC}])
+# $(info $$GCC_OBJ is [${GCC_OBJ}])
+# $(info $$NVCC_OBJ is [${NVCC_OBJ}])
+
+# $(info $$CPU_OBJ is [${CPU_OBJ}])
+# $(info $$GPU_GCC_OBJ is [${GPU_GCC_OBJ}])
+# $(info $$GPU_NVCC_OBJ is [${GPU_NVCC_OBJ}])
+
+HEADER = $(wildcard $(SRC_DIR)/*/*.h) $(wildcard $(SRC_DIR)/*.h)
+CPU_DEPS = $(wildcard $(CPU_BUILD_DIR)/*.d)
+GPU_DEPS = $(wildcard $(GPU_BUILD_DIR)/*.d)
+
+INC := $(INCLUDE:%=-I%)
+LIB := $(LIBS_DIR:%=-L%)
+LIB_GPU := $(LIBS_DIR_GPU:%=-L%)
+LD := $(LIBS:%=-l%)
+LD_GPU := $(LIBS_GPU:%=-l%)
+
+# Reminder:
+# $< = first prerequisite
+# $@ = the target which matched the rule
+# $^ = all prerequisites
+
+.PHONY: all clean
+
+all : cpu gpu
+
+cpu: $(TARGET)CPU
+gpu: $(TARGET)GPU
+
+$(TARGET)CPU: $(CPU_OBJ)
+	$(CC) $(CFLAGS) $^ -o $@ $(INC) $(LIB) $(LDFLAGS)
+
+$(CPU_BUILD_DIR)/%.o $(CPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cpp | $(CPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -c -o $@ $< $(INC)
+
+# For GPU, we need to build the NVCC objects, the NVCC linked object, and the
+# regular ones. Then, we link them all together.
+$(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR)
+	$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
+
+$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR)
+	$(NVCC) --device-link $^ -o $@
+
+$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR)
+	$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)
+
+$(GPU_BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp | $(GPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -g -DCUDA -c -o $@ $< $(INC)
+
+-include $(CPU_DEPS)
+-include $(GPU_DEPS)
+
+$(CPU_BUILD_DIR):
+	mkdir -p $@
+
+$(GPU_BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -Rf $(BUILD_DIR) $(TARGET)CPU $(TARGET)GPU
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..24b9cfb
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,40 @@
+=========
+CudaTools
+=========
+This is the documentation for CudaTools, a header-only library and framework
+for the development of CPU-CUDA compatible applications. Using CudaTools enables
+the creation of a single unified code that has both CPU and CUDA compilation targets with minimal need to
+introduce ``#ifdef`` statements when code is essentially identical between the targets.
+
+For information on the library itself and its usage, view `documentation <https://acem.ece.illinois.edu/code/CudaTools>`__. The small code snippets and samples
+seen in the documentation are in the folder ``samples``.
+
+Dependencies
+============
+- Eigen
+
+In the future, we will make this dependency optional, but still provide support
+for it. As of now, it is necessary.
+
+Building the Documentation
+==========================
+The documentation is built with `Doxygen <https://doxygen.nl/>`__ and `Sphinx <https://www.sphinx-doc.org/en>`__.
+So, first make sure you have Doxygen installed on your system, and make sure it is added
+to your system path. Then, you will have to create a Python virtual environment
+in the repository folder
+
+.. code-block:: bash
+
+    $ python3 -m venv .venv
+
+After installing the required Python packages
+
+.. code-block:: bash
+
+    $ pip install -r requirements
+
+you can now run the script
+
+.. code-block:: bash
+
+    $ ./build_docs
diff --git a/build_docs b/build_docs
new file mode 100644
index 0000000..2c8fd33
--- /dev/null
+++ b/build_docs
@@ -0,0 +1,2 @@
+doxygen docs/Doxyfile
+sphinx-build -b html docs/source docs/build/html
diff --git a/docs/Doxyfile b/docs/Doxyfile
new file mode 100644
index 0000000..1e208e6
--- /dev/null
+++ b/docs/Doxyfile
@@ -0,0 +1,2579 @@
+# Doxyfile 1.8.17
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "CudaTools"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = "0.0.1"
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = "docs/build"
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is
+# Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# (including Cygwin) ands Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = "./"
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
+# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd,
+# *.vhdl, *.ucf, *.qsf and *.ice.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.idl \
+                         *.ddl \
+                         *.odl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.cs \
+                         *.d \
+                         *.php \
+                         *.php4 \
+                         *.php5 \
+                         *.phtml \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.doc \
+                         # *.txt /
+                         *.py \
+                         *.pyw \
+                         *.f90 \
+                         *.f95 \
+                         *.f03 \
+                         *.f08 \
+                         *.f \
+                         *.for \
+                         *.tcl \
+                         *.vhd \
+                         *.vhdl \
+                         *.ucf \
+                         *.qsf \
+                         *.ice
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# entity all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          =
+
+# If clang assisted parsing is enabled you can provide the clang parser with the
+# path to the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
+# were built. This is equivalent to specifying the "-p" option to a clang tool,
+# such as clang-check. These options will then be passed to the parser.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+
+CLANG_DATABASE_PATH    =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = NO
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: https://developer.apple.com/xcode/), introduced with OSX
+# 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using JavaScript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         =
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+/ignore
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = YES
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = "xml"
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = CUDATOOLS_DOXYGEN
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      = HD
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: YES.
+
+HAVE_DOT               = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
+# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
+# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..6247f7e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/_static/js/mathjax-config.js b/docs/source/_static/js/mathjax-config.js
new file mode 100644
index 0000000..be85a45
--- /dev/null
+++ b/docs/source/_static/js/mathjax-config.js
@@ -0,0 +1,39 @@
+sections = {
+  "mesh_prep": 1,
+  "matrix_assembly": 2,
+  "bc_calc": 3,
+  "timestep": 4,
+}
+
+
+window.MathJax = {
+  loader: {load: ['[tex]/tagformat', '[tex]/ams']},
+  tex: {
+    packages: {'[+]': ['tagformat', 'ams']},
+    macros: {
+      dd: "{\\, \\mathrm{d}}",
+      E: "{\\mathbf{E}}",
+      H: "{\\mathbf{H}}",
+      J: "{\\mathbf{J}}",
+      D: "{\\mathbf{D}}",
+      B: "{\\mathbf{B}}",
+      M: "{\\mathbf{M}}",
+      tbE: "{\\tilde{\\E}}",
+      tbH: "{\\tilde{\\H}}",
+      tE: "{\\tilde{E}}",
+      tH: "{\\tilde{H}}",
+      tphi: "{\\tilde{\\phi}}",
+      curl: ["{\\nabla \\times {#1}}", 1],
+      div: ["{\\nabla \\cdot {#1}}", 1],
+      tens: ["{\\bar{\\bar{{#1}}}}", 1],
+    },
+    tags: 'ams',
+    tagformat: {
+      number: (n) => sections[window.location.pathname.split("/").pop().split(".")[0]] + '.' + n,
+    },
+    ams: {
+      multilineWidth: '100%',
+      multilineIndent: '50em'
+    }
+  },
+}
diff --git a/docs/source/array.rst b/docs/source/array.rst
new file mode 100644
index 0000000..53c87e2
--- /dev/null
+++ b/docs/source/array.rst
@@ -0,0 +1,26 @@
+=======
+Array.h
+=======
+
+The ``Array.h`` header file contains the Array class, and its related classes. For this
+file only, assume that every functions is callable on both host and device unless
+explicitly mentioned otherwise.
+
+CudaTools::Shape
+----------------
+.. doxygenclass:: CudaTools::Shape
+    :members:
+    :allow-dot-graphs:
+
+CudaTools::ArrayIterator<T>
+---------------------------
+.. doxygenclass:: CudaTools::ArrayIterator
+    :members:
+    :allow-dot-graphs:
+
+CudaTools::Array<T>
+-------------------
+.. doxygenclass:: CudaTools::Array
+    :members:
+    :private-members:
+    :allow-dot-graphs:
diff --git a/docs/source/blas.rst b/docs/source/blas.rst
new file mode 100644
index 0000000..28235f0
--- /dev/null
+++ b/docs/source/blas.rst
@@ -0,0 +1,45 @@
+======
+BLAS.h
+======
+
+The ``BLAS.h`` header file contains some BLAS functions, and some related
+classes for those functions.
+
+BLAS Functions
+==============
+Currently, these are the supported BLAS functions. They are inherited mainly
+from the cuBLAS API, and condensed into a unified functions. The plan is to
+add them as necessary.
+
+CudaTools::BLAS::GEMV<T>
+------------------------
+.. doxygenfunction:: CudaTools::BLAS::GEMV
+
+CudaTools::BLAS::GEMM<T>
+------------------------
+.. doxygenfunction:: CudaTools::BLAS::GEMM
+
+CudaTools::BLAS::DGMM<T>
+------------------------
+.. doxygenfunction:: CudaTools::BLAS::DGMM
+
+BLAS Classes
+============
+
+These classes also inherit functions from the cuBLAS API, but are packaged
+into classes that are more intuitive and hide external details.
+
+CudaTools::BLAS::Batch<T>
+-------------------------
+.. doxygenclass:: CudaTools::BLAS::Batch
+    :members:
+
+CudaTools::BLAS::PLUArray<T>
+----------------------------
+.. doxygenclass:: CudaTools::BLAS::PLUArray
+    :members:
+
+CudaTools::BLAS::PLUBatch<T>
+----------------------------
+.. doxygenclass:: CudaTools::BLAS::PLUBatch
+    :members:
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..afce172
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,53 @@
+# Configuration file for the Sphinx documentation builder.
+
+# -- Project information
+
+project = 'DGEMS'
+copyright = '2022'
+author = 'Kenneth Jao, Qi Jian Lim'
+
+release = '0.1'
+version = '0.1.0'
+
+# -- General configuration
+
+html_static_path = ["_static"]
+html_js_files = ["js/mathjax-config.js"]
+
+extensions = [
+    'sphinx.ext.duration',
+    'sphinx.ext.doctest',
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.autosectionlabel',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.graphviz',
+    'sphinxcontrib.bibtex',
+    'breathe',
+]
+
+breathe_projects = {"DGEMS": "../build/xml"}
+breathe_default_project = "DGEMS"
+
+bibtex_bibfiles = ['refs.bib']
+
+mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
+
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3/', None),
+    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
+intersphinx_disabled_domains = ['std']
+
+templates_path = ['_templates']
+
+# -- Options for HTML output
+
+html_theme = 'sphinx_rtd_theme'
+html_theme_options = {
+    'collapse_navigation': False,
+}
+
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
diff --git a/docs/source/core.rst b/docs/source/core.rst
new file mode 100644
index 0000000..d6e2874
--- /dev/null
+++ b/docs/source/core.rst
@@ -0,0 +1,67 @@
+======
+Core.h
+======
+
+The ``Core.h`` header file defines several compiler flags and macros along with
+a few core classes.
+
+Flags
+=====
+
+Device Indicators
+-----------------
+.. doxygendefine:: CUDACC
+.. doxygendefine:: DEVICE
+
+Host-Device Automation
+----------------------
+.. doxygendefine:: HD
+.. doxygendefine:: SHARED
+
+Compilation Options
+-------------------
+.. doxygendefine:: CUDATOOLS_ARRAY_MAX_AXES
+
+Macros
+======
+
+Kernel
+------
+.. doxygendefine:: DECLARE_KERNEL
+.. doxygendefine:: DEFINE_KERNEL
+.. doxygendefine:: KERNEL
+
+Device Helpers
+--------------
+
+.. doxygendefine:: BASIC_LOOP
+
+Device Class
+------------
+
+.. doxygendefine:: DEVICE_CLASS
+
+
+Classes and Structs
+===================
+
+CudaTools::StreamID
+-------------------
+
+.. doxygenstruct:: CudaTools::StreamID
+
+CudaTools::Manager
+------------------
+
+.. doxygenclass:: CudaTools::Manager
+    :members:
+
+CudaTools::Kernel::Settings
+---------------------------
+
+.. doxygenstruct:: CudaTools::Kernel::Settings
+    :members:
+
+CudaTools::Kernel::Basic
+------------------------
+.. doxygenfunction:: CudaTools::Kernel::basic
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..0880826
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,25 @@
+=========
+CudaTools
+=========
+This is the documentation for CudaTools, a header-only library and framework
+for the development of CPU-CUDA compatible applications. Using CudaTools enables
+the creation of a single unified code that has both CPU and CUDA compilation targets with minimal need to
+introduce ``#ifdef`` statements when code is essentially identical between the targets.
+
+To get started, please head over to the :doc:`usage` section. For more detail on the
+machinery underneath, please refer to the other other sections.
+
+.. note::
+
+   If you would like to contribute, please visit the `git page <https://git.acem.ece.illinois.edu/kjao/CudaTools>`__.
+
+Contents
+========
+
+.. toctree::
+   :maxdepth: 2
+
+   usage
+   core
+   array
+   blas
diff --git a/docs/source/refs.bib b/docs/source/refs.bib
new file mode 100644
index 0000000..e69de29
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
new file mode 100644
index 0000000..60807a6
--- /dev/null
+++ b/docs/source/usage.rst
@@ -0,0 +1,128 @@
+==================
+Usage and Examples
+==================
+
+
+This library is broken up into three main parts, as well as a certain
+compilation and linking framework:
+
+#. :ref:`Core Examples`
+#. :ref:`Array Examples`
+#. :ref:`BLAS Examples`
+#. :ref:`Compilation and Linking`
+
+The ``Core.h`` header contains the necessary macros, flags and objects for interfacing with
+basic kernel launching and the CUDA Runtime API. The ``Array.h`` header contains the ``CudaTools::Array``
+class which provides a device compatible Array-like class with easy memory management. Lastly,
+the ``BLAS.h`` header provides functions BLAS functions through the the cuBLAS library for the GPU,
+and Eigen for the CPU. Lastly, a templated Makefile is provided which can be used
+for your own project, after following a few rules.
+
+The usage of this libary will be illustrated through examples, and further details
+can be found in the other sections. The examples are given in the `samples <https://git.acem.ece.illinois.edu/kjao/CudaTools/src/branch/main/samples>`__ folder.
+Throughout this documentation, there are a few common terms that may appear. First,we refer to the CPU as the host, and the GPU as the device. So, a host function refers
+to a function runnable on the CPU, and a device function refers to a function that is runnable
+on a device. A kernel is a specific function that the host can call to be run on the device.
+
+Core Examples
+=============
+This file mainly introduces compiler macros and a few classes that are used to improve the
+syntax between host and device code. To define and call a kernel, there are a few
+macros provided. For example,
+
+.. code-block:: cpp
+
+    DEFINE_KERNEL(add, int x, int y) {
+        printf("Kernel: %i\n", x + y);
+    }
+
+    int main() {
+        KERNEL(add, CudaTools::Kernel::basic(1), 1, 1); // Prints 2.
+        return 0;
+    }
+
+The ``DEFINE_KERNEL(name, ...)`` macro takes in the function name and its arguments.
+The second argument in the ``KERNEL()`` macro is are the launch parameters for
+kernel. The launch parameters have several items, but for 'embarassingly parallel'
+cases, we can simply generate the settings with the number of threads. More detail with
+creating launch parameters can be found :ref:`here <CudaTools::Kernel::Settings>`. In the above example,
+there is only one thread. The rest of the arguments are just the kernel arguments. For more detail,
+see :ref:`here <Macros>`.
+
+.. warning::
+   These kernel definitions must be in a file that will be compiled by ``nvcc``. Also,
+   for header files, there is an additional macro ``DECLARE_KERNEL(name, ...)`` to declare it
+   and make it available to other files.
+
+Since many applications used classes, a macro is provided to 'convert' a class into
+being device-compatible. Following the previous example similarly,
+
+.. code-block:: cpp
+
+    class intPair {
+        DEVICE_CLASS(intPair)
+        public:
+            int x, y;
+
+            intPair(const int x_, const int y_) : x(x_), y(y_) {
+                allocateDevice(); // Allocates memory for this intPair on the device.
+                updateDevice().wait(); // Copies the memory on the host to the device and waits until finished.
+            };
+
+            HD void swap() {
+                int swap = x;
+                x = y;
+                y = swap;
+            };
+    };
+
+    DEFINE_KERNEL(swap, intPair* const pair) { pair->swap(); }
+
+    int main() {
+        intPair pair(1, 2);
+        printf("Before: %u, %u\n", pair.x, pair.y); // Prints 1, 2.
+
+        KERNEL(swap, CudaTools::Kernel::basic(1), pair.that()).wait();
+        pair.updateHost().wait(); // Copies the memory from the device back to the host and waits until finished.
+
+        printf("After: %u, %u\n", pair.x, pair.y); // Prints 2, 1.
+        return 0;
+    }
+
+In this example, we create a class called ``intPair``, which is then made available on the device through
+the ``DEVICE_CLASS(name)`` macro. Specifically, that macro introduces a few functions, like
+``allocateDevice()``, ``updateDevice()``, ``updateHost()``, and ``that()``. That last function
+returns a pointer to the copy on the device. For more details, see :ref:`here <Device Class>`. If we were to pass in the host pointer of the ``intPair`` to the kernel, there would be a illegal memory access.
+
+The kernel argument list should **must** consist of pointers to objects, or a non-reference object.
+Otherwise, compilation will fail. In general this is safer, as it forces the programmer to
+acknowledge that the device copy is being passed. For the latter case of a non-reference object,
+you should only do this if there is no issue in creating a copy of the original object. In the above
+example, we could have done this, but for more complicated classes it may result in unwanted behavior.
+
+Lastly, since the point of classes is usually to have some member functions, to have them
+available on the device, you must mark them with the compiler macro ``HD`` in front.
+
+We also introduce the ``wait()`` function, which waits for the command to complete before
+continuing. Most calls that involve the device are asynchronous, so without proper blocking,
+operations dependent on a previous command are not guaranteed to run correctly. If the code is
+compiled for CPU, then everything will run synchronously, as per usual.
+
+.. note::
+   Almost all functions that are asynchronous provide an optional 'stream' argument,
+   where you can give the name of the stream you wish to use. Different streams run
+   asynchronous, but operations on the same stream are FIFO. To define a stream to use
+   later, you must call ``CudaTools::Manager::get()->addStream("myStream")`` at some point
+   before you use it. For more details, see :ref:`here <CudaTools::Manager>`.
+
+
+Array Examples
+==============
+
+
+BLAS Examples
+=============
+
+
+Compilation and Linking
+=======================
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..30ce9e6
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+Sphinx>=5.1.1
+sphinx-rtd-theme>=1.0.0
+sphinxcontrib-bibtex>=2.5.0
+breathe>=4.34.0
diff --git a/samples/1_CoreKernel/Makefile b/samples/1_CoreKernel/Makefile
new file mode 100644
index 0000000..09436ea
--- /dev/null
+++ b/samples/1_CoreKernel/Makefile
@@ -0,0 +1,95 @@
+CC := g++-10
+NVCC := nvcc
+CFLAGS := -Wall -std=c++17 -fopenmp -MMD
+NVCC_FLAGS := -MMD -w -Xcompiler
+
+INCLUDE := ../../
+LIBS_DIR :=
+LIBS_DIR_GPU := /usr/local/cuda/lib64
+LIBS :=
+LIBS_GPU := cuda cudart cublas
+
+TARGET = coreKernel
+SRC_DIR = .
+BUILD_DIR = build
+
+# Should not need to modify below.
+
+CPU_BUILD_DIR = $(BUILD_DIR)/cpu
+GPU_BUILD_DIR = $(BUILD_DIR)/gpu
+
+SRC = $(wildcard $(SRC_DIR)/*/*.cpp) $(wildcard $(SRC_DIR)/*.cpp)
+
+# Get source files and object files.
+GCC_SRC = $(filter-out %.cu.cpp ,$(SRC))
+NVCC_SRC = $(filter %.cu.cpp, $(SRC))
+GCC_OBJ = $(GCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+NVCC_OBJ = $(NVCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+
+# If compiling for CPU, all go to GCC. Otherwise, they are split.
+CPU_OBJ = $(addprefix $(CPU_BUILD_DIR)/,$(GCC_OBJ)) $(addprefix $(CPU_BUILD_DIR)/,$(NVCC_OBJ))
+GPU_GCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(GCC_OBJ))
+GPU_NVCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(NVCC_OBJ))
+
+# $(info $$GCC_SRC is [${GCC_SRC}])
+# $(info $$NVCC_SRC is [${NVCC_SRC}])
+# $(info $$GCC_OBJ is [${GCC_OBJ}])
+# $(info $$NVCC_OBJ is [${NVCC_OBJ}])
+
+# $(info $$CPU_OBJ is [${CPU_OBJ}])
+# $(info $$GPU_GCC_OBJ is [${GPU_GCC_OBJ}])
+# $(info $$GPU_NVCC_OBJ is [${GPU_NVCC_OBJ}])
+
+HEADER = $(wildcard $(SRC_DIR)/*/*.h) $(wildcard $(SRC_DIR)/*.h)
+CPU_DEPS = $(wildcard $(CPU_BUILD_DIR)/*.d)
+GPU_DEPS = $(wildcard $(GPU_BUILD_DIR)/*.d)
+
+INC := $(INCLUDE:%=-I%)
+LIB := $(LIBS_DIR:%=-L%)
+LIB_GPU := $(LIBS_DIR_GPU:%=-L%)
+LD := $(LIBS:%=-l%)
+LD_GPU := $(LIBS_GPU:%=-l%)
+
+# Reminder:
+# $< = first prerequisite
+# $@ = the target which matched the rule
+# $^ = all prerequisites
+
+.PHONY: all clean
+
+all : cpu gpu
+
+cpu: $(TARGET)CPU
+gpu: $(TARGET)GPU
+
+$(TARGET)CPU: $(CPU_OBJ)
+	$(CC) $(CFLAGS) $^ -o $@ $(INC) $(LIB) $(LDFLAGS)
+
+$(CPU_BUILD_DIR)/%.o $(CPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cpp | $(CPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -c -o $@ $< $(INC)
+
+# For GPU, we need to build the NVCC objects, the NVCC linked object, and the
+# regular ones. Then, we link them all together.
+$(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR)
+	$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
+
+$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR)
+	$(NVCC) --device-link $^ -o $@
+
+$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR)
+	$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)
+
+$(GPU_BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp | $(GPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -g -DCUDA -c -o $@ $< $(INC)
+
+-include $(CPU_DEPS)
+-include $(GPU_DEPS)
+
+$(CPU_BUILD_DIR):
+	mkdir -p $@
+
+$(GPU_BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -Rf $(BUILD_DIR) $(TARGET)CPU $(TARGET)GPU
diff --git a/samples/1_CoreKernel/main.cu.cpp b/samples/1_CoreKernel/main.cu.cpp
new file mode 100644
index 0000000..e8e8519
--- /dev/null
+++ b/samples/1_CoreKernel/main.cu.cpp
@@ -0,0 +1,12 @@
+#define CUDATOOLS_IMPLEMENTATION
+#include <Core.h>
+
+DEFINE_KERNEL(add, int x, int y) {
+    printf("Kernel: %i\n", x + y);
+}
+
+int main() {
+    KERNEL(add, CudaTools::Kernel::basic(1), 1, 1); // Prints 2.
+    return 0;
+}
+
diff --git a/samples/2_CoreClass/Makefile b/samples/2_CoreClass/Makefile
new file mode 100644
index 0000000..f0c9f94
--- /dev/null
+++ b/samples/2_CoreClass/Makefile
@@ -0,0 +1,95 @@
+CC := g++-10
+NVCC := nvcc
+CFLAGS := -Wall -std=c++17 -fopenmp -MMD
+NVCC_FLAGS := -MMD -w -Xcompiler
+
+INCLUDE := ../../
+LIBS_DIR :=
+LIBS_DIR_GPU := /usr/local/cuda/lib64
+LIBS :=
+LIBS_GPU := cuda cudart cublas
+
+TARGET = coreClass
+SRC_DIR = .
+BUILD_DIR = build
+
+# Should not need to modify below.
+
+CPU_BUILD_DIR = $(BUILD_DIR)/cpu
+GPU_BUILD_DIR = $(BUILD_DIR)/gpu
+
+SRC = $(wildcard $(SRC_DIR)/*/*.cpp) $(wildcard $(SRC_DIR)/*.cpp)
+
+# Get source files and object files.
+GCC_SRC = $(filter-out %.cu.cpp ,$(SRC))
+NVCC_SRC = $(filter %.cu.cpp, $(SRC))
+GCC_OBJ = $(GCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+NVCC_OBJ = $(NVCC_SRC:$(SRC_DIR)/%.cpp=%.o)
+
+# If compiling for CPU, all go to GCC. Otherwise, they are split.
+CPU_OBJ = $(addprefix $(CPU_BUILD_DIR)/,$(GCC_OBJ)) $(addprefix $(CPU_BUILD_DIR)/,$(NVCC_OBJ))
+GPU_GCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(GCC_OBJ))
+GPU_NVCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(NVCC_OBJ))
+
+# $(info $$GCC_SRC is [${GCC_SRC}])
+# $(info $$NVCC_SRC is [${NVCC_SRC}])
+# $(info $$GCC_OBJ is [${GCC_OBJ}])
+# $(info $$NVCC_OBJ is [${NVCC_OBJ}])
+
+# $(info $$CPU_OBJ is [${CPU_OBJ}])
+# $(info $$GPU_GCC_OBJ is [${GPU_GCC_OBJ}])
+# $(info $$GPU_NVCC_OBJ is [${GPU_NVCC_OBJ}])
+
+HEADER = $(wildcard $(SRC_DIR)/*/*.h) $(wildcard $(SRC_DIR)/*.h)
+CPU_DEPS = $(wildcard $(CPU_BUILD_DIR)/*.d)
+GPU_DEPS = $(wildcard $(GPU_BUILD_DIR)/*.d)
+
+INC := $(INCLUDE:%=-I%)
+LIB := $(LIBS_DIR:%=-L%)
+LIB_GPU := $(LIBS_DIR_GPU:%=-L%)
+LD := $(LIBS:%=-l%)
+LD_GPU := $(LIBS_GPU:%=-l%)
+
+# Reminder:
+# $< = first prerequisite
+# $@ = the target which matched the rule
+# $^ = all prerequisites
+
+.PHONY: all clean
+
+all : cpu gpu
+
+cpu: $(TARGET)CPU
+gpu: $(TARGET)GPU
+
+$(TARGET)CPU: $(CPU_OBJ)
+	$(CC) $(CFLAGS) $^ -o $@ $(INC) $(LIB) $(LDFLAGS)
+
+$(CPU_BUILD_DIR)/%.o $(CPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cpp | $(CPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -c -o $@ $< $(INC)
+
+# For GPU, we need to build the NVCC objects, the NVCC linked object, and the
+# regular ones. Then, we link them all together.
+$(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR)
+	$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
+
+$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR)
+	$(NVCC) --device-link $^ -o $@
+
+$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR)
+	$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)
+
+$(GPU_BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp | $(GPU_BUILD_DIR)
+	$(CC) $(CFLAGS) -g -DCUDA -c -o $@ $< $(INC)
+
+-include $(CPU_DEPS)
+-include $(GPU_DEPS)
+
+$(CPU_BUILD_DIR):
+	mkdir -p $@
+
+$(GPU_BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -Rf $(BUILD_DIR) $(TARGET)CPU $(TARGET)GPU
diff --git a/samples/2_CoreClass/main.cu.cpp b/samples/2_CoreClass/main.cu.cpp
new file mode 100644
index 0000000..99e588c
--- /dev/null
+++ b/samples/2_CoreClass/main.cu.cpp
@@ -0,0 +1,34 @@
+#define CUDATOOLS_IMPLEMENTATION
+#include <Core.h>
+
+class intPair {
+    DEVICE_CLASS(intPair)
+    public:
+        int x, y;
+
+        intPair(const int x_, const int y_) : x(x_), y(y_) {
+            allocateDevice(); // Allocates memory for this intPair on the device.
+            updateDevice().wait(); // Copies the memory on the host to the device and waits until finished.
+        };
+
+        HD void swap() {
+            int swap = x;
+            x = y;
+            y = swap;
+        };
+};
+
+DEFINE_KERNEL(swap, intPair* const pair) { pair->swap(); }
+
+int main() {
+    intPair pair(1, 2);
+    printf("Before: %u, %u\n", pair.x, pair.y); // Prints 1, 2.
+
+    KERNEL(swap, CudaTools::Kernel::basic(1), pair.that()).wait();
+    pair.updateHost().wait(); // Copies the memory from the device back to the host and waits until finished.
+
+    printf("After: %u, %u\n", pair.x, pair.y); // Prints 2, 1.
+    return 0;
+}
+
+
diff --git a/tests.cu.cpp b/tests.cu.cpp
new file mode 100644
index 0000000..d3e179c
--- /dev/null
+++ b/tests.cu.cpp
@@ -0,0 +1,494 @@
+#define CUDATOOLS_IMPLEMENTATION
+#define CUDATOOLS_ARRAY_MAX_AXES 8
+#include "Array.h"
+#include "BLAS.h"
+#include "Core.h"
+
+#include <Eigen/Core>
+#include <chrono>
+#include <complex>
+
+namespace CT = CudaTools;
+
+/////////////
+// Helpers //
+/////////////
+
+#define TIME_START(name) auto begin_##name = std::chrono::steady_clock::now()
+
+#define TIME_END(name)                                                                             \
+    auto end_##name = std::chrono::steady_clock::now();                                            \
+    auto time_ms_##name =                                                                          \
+        std::chrono::duration_cast<std::chrono::milliseconds>(end_##name - begin_##name).count();  \
+    auto time_mus_##name =                                                                         \
+        std::chrono::duration_cast<std::chrono::microseconds>(end_##name - begin_##name).count();  \
+    if (time_ms_##name == 0) {                                                                     \
+        printf("[%s] Time Elapsed: %ld[µs]\n", #name, time_mus_##name);                            \
+    } else {                                                                                       \
+        printf("[%s] Time Elapsed: %ld[ms]\n", #name, time_ms_##name);                             \
+    }
+
+#define TIME(call, name)                                                                           \
+    TIME_START(name);                                                                              \
+    call;                                                                                          \
+    TIME_END(name);
+
+#define TEST(predicate, name, msg)                                                                 \
+    failed += (predicate) ? 0 : 1;                                                                 \
+    printf("[%s] ", (predicate) ? "\033[1;32mPASS\033[0m" : "\033[1;31mFAIL\033[0m");              \
+    printf("%s | %s.\n", name, msg);
+
+template <typename T> struct Type;
+
+#define REGISTER_PARSE_TYPE(X)                                                                     \
+    template <> struct Type<X> { static const std::string name; };                                 \
+    const std::string Type<X>::name = #X
+
+REGISTER_PARSE_TYPE(uint8_t);
+REGISTER_PARSE_TYPE(int16_t);
+REGISTER_PARSE_TYPE(int32_t);
+REGISTER_PARSE_TYPE(float);
+REGISTER_PARSE_TYPE(double);
+
+std::string box(std::string str) {
+    std::string tops(str.size() + 6, '#');
+    return tops + "\n## " + str + " ##\n" + tops;
+}
+
+std::string box2(std::string str) {
+    std::string tops(str.size() - 5, '-');
+    return tops + "\n|| " + str + " ||\n" + tops;
+}
+
+std::string boxSmall(std::string str) {
+    std::string tops(6, '-');
+    return tops + "[ " + str + " ]" + tops;
+}
+
+std::string separator() {
+    std::string line(40, '=');
+    return "\n" + line + "\n";
+}
+
+template <typename T> std::string type() { return "\033[1;96m" + Type<T>::name + "\033[0m"; }
+
+CT::Shape makeRandom2DShape() {
+    std::random_device rd;
+    std::mt19937 mt(rd());
+    std::uniform_int_distribution<uint32_t> dist(1, 15);
+    return CT::Shape({dist(mt), dist(mt)});
+}
+
+///////////
+// Tests //
+///////////
+
+class TestClass {
+    DEVICE_CLASS(TestClass);
+
+  public:
+    int x;
+    TestClass(const int x) : x(x) {
+        allocateDevice();
+        updateDevice().wait();
+    };
+};
+
+DEFINE_KERNEL(times, const CT::Array<int> arr) {
+    BASIC_LOOP(arr.shape().length()) { arr[iThread] *= 2; }
+}
+
+DEFINE_KERNEL(classTest, TestClass* const test) { test->x = 100; }
+
+struct MacroTests {
+    static uint32_t Kernel() {
+        uint32_t failed = 0;
+        CT::Array<int> A = CT::Array<int>::constant({10}, 1);
+        A.updateDevice().wait();
+        KERNEL(times, CT::Kernel::basic(A.shape().items()), A.view()).wait();
+        A.updateHost().wait();
+
+        uint32_t errors = 0;
+        for (auto it = A.begin(); it != A.end(); ++it) {
+            if (*it != 2) ++errors;
+        }
+
+        std::ostringstream msg;
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Kernel", msg.str().c_str());
+        return failed;
+    };
+
+    static uint32_t Class() {
+        uint32_t failed = 0;
+        TestClass test(1);
+        KERNEL(classTest, CT::Kernel::basic(1), test.that()).wait();
+        test.updateHost().wait();
+
+        TEST(test.x == 100, "Class", "Errors: 0");
+        return failed;
+    }
+};
+
+template <typename T> struct ArrayTests {
+    static uint32_t Indexing() {
+        uint32_t failed = 0;
+        CT::Array<T> A = CT::Array<T>::range(0, 240);
+        A.reshape({5, 3, 1, 4, 2, 1, 1, 2});
+
+        uint32_t errors = 0;
+        for (uint32_t i = 0; i < 5; ++i) {
+            for (uint32_t j = 0; j < 3; ++j) {
+                for (uint32_t k = 0; k < 4; ++k) {
+                    for (uint32_t l = 0; l < 2; ++l) {
+                        for (uint32_t m = 0; m < 2; ++m) {
+                            if ((T)A[i][j][0][k][l][0][0][m] != (T)A[{i, j, 0, k, l, 0, 0, m}]) {
+                                ++errors;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        std::ostringstream msg;
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Element", msg.str().c_str());
+
+        errors = 0;
+        CT::Array<T> ApartGroup_1 = A[{2, 2}];
+        CT::Array<T> ApartIndiv_1 = A[2][2];
+        for (uint32_t k = 0; k < 4; ++k) {
+            for (uint32_t l = 0; l < 2; ++l) {
+                for (uint32_t m = 0; m < 2; ++m) {
+                    if ((T)ApartIndiv_1[0][k][l][0][0][m] != (T)ApartGroup_1[{0, k, l, 0, 0, m}]) {
+                        ++errors;
+                    }
+                }
+            }
+        }
+
+        msg.str("");
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Axis (1/2)", msg.str().c_str());
+
+        errors = 0;
+        CT::Array<T> ApartGroup_2 = A[{3, 2, 0, 3}];
+        CT::Array<T> ApartIndiv_2 = A[3][2][0][3];
+
+        for (uint32_t l = 0; l < 2; ++l) {
+            for (uint32_t m = 0; m < 2; ++m) {
+                if ((T)ApartIndiv_2[l][0][0][m] != (T)ApartGroup_2[{l, 0, 0, m}]) {
+                    ++errors;
+                }
+            }
+        }
+
+        msg.str("");
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Axis (2/2)", msg.str().c_str());
+        return failed;
+    };
+
+    static uint32_t Slicing() {
+        uint32_t failed = 0;
+        CT::Array<T> A = CT::Array<T>::constant({4, 5, 5}, 0);
+
+        CT::Array<T> Aslice = A.slice({{0, 3}, {1, 4}, {1, 4}});
+        T num = (T)1;
+        for (auto it = Aslice.begin(); it != Aslice.end(); ++it) {
+            *it = num;
+            ++num;
+        }
+
+        CT::Array<T> Aslice2 = A[3].slice({{0, 5}, {0, 1}});
+        num = (T)-1;
+        for (auto it = Aslice2.begin(); it != Aslice2.end(); ++it) {
+            *it = num;
+            --num;
+        }
+
+        uint32_t errors = 0;
+        for (int i = 0; i < 3; ++i) {
+            for (int j = 0; j < 3; ++j) {
+                for (int k = 0; k < 3; ++k) {
+                    if ((T)A[i][1 + j][1 + k] != (T)(9 * i + 3 * j + k + 1)) {
+                        ++errors;
+                    }
+                }
+            }
+        }
+        std::ostringstream msg;
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Block", msg.str().c_str());
+
+        errors = 0;
+        for (int i = 0; i < 5; ++i) {
+            if ((T)A[3][i][0] != (T)(-(i + 1))) {
+                ++errors;
+            }
+        }
+
+        msg.str("");
+        msg << "Errors: " << errors;
+        TEST(errors == 0, "Column", msg.str().c_str());
+        return failed;
+    }
+};
+
+template <typename T> struct BLASTests {
+    static double thres;
+    static uint32_t GEMV(int attempts) {
+        uint32_t failed = 0;
+        for (int i = 0; i < attempts; i++) {
+            CT::Shape Ashape = makeRandom2DShape();
+            CT::Shape xshape = CT::Shape({Ashape.cols(), 1});
+            CT::Shape yshape = CT::Shape({Ashape.rows(), 1});
+
+            CT::Array<T> A(Ashape);
+            CT::Array<T> x(xshape);
+            CT::Array<T> y(yshape);
+
+            A.setRandom(-100, 100);
+            x.setRandom(-100, 100);
+
+            A.updateDevice();
+            x.updateDevice().wait();
+
+            CT::BLAS::GEMV<T>(1.0, A, x, 0.0, y).wait();
+            y.updateHost().wait();
+
+            CT::Array<T> yTest(yshape, true);
+            yTest.eigenMap() = A.eigenMap() * x.eigenMap();
+
+            double norm = (y.eigenMap() - y.eigenMap()).norm();
+
+            std::ostringstream name;
+            name << "GEMV (" << i + 1 << "/" << attempts << ")";
+            std::ostringstream msg;
+            msg << "Matrix Shape: " << Ashape << ", "
+                << "Residual: " << norm;
+            TEST(norm < thres, name.str().c_str(), msg.str().c_str());
+        }
+        return failed;
+    };
+
+    static uint32_t GEMVBroadcast() {
+        uint32_t failed = 0;
+        CT::Shape Ashape = makeRandom2DShape();
+        CT::Shape xshape = CT::Shape({Ashape.cols(), 1});
+        CT::Shape yshape = CT::Shape({Ashape.rows(), 1});
+
+        CT::Array<T> A({2, 3, Ashape.rows(), Ashape.cols()});
+        CT::Array<T> x({2, 3, xshape.rows(), xshape.cols()});
+        CT::Array<T> y({2, 3, yshape.rows(), yshape.cols()});
+
+        A.setRandom(-100, 100);
+        x.setRandom(-100, 100);
+
+        A.updateDevice();
+        x.updateDevice().wait();
+
+        CT::BLAS::GEMV<T>(1.0, A, x, 0.0, y).wait();
+        y.updateHost().wait();
+
+        double norm = 0;
+        CT::Array<T> yTest(yshape, true);
+        for (int i = 0; i < 2; ++i) {
+            for (int j = 0; j < 3; ++j) {
+                yTest.eigenMap() = A[i][j].eigenMap() * x[i][j].eigenMap();
+                norm += (yTest.eigenMap() - y[i][j].eigenMap()).norm();
+            }
+        }
+
+        std::ostringstream msg;
+        msg << "Matrix Shape: " << Ashape << ", "
+            << "Residual: " << norm;
+        TEST(norm < thres, "GEMV Broadcast", msg.str().c_str());
+        return failed;
+    };
+
+    static uint32_t GEMM(int attempts) {
+        uint32_t failed = 0;
+        for (int i = 0; i < attempts; i++) {
+            CT::Shape Ashape = makeRandom2DShape();
+            CT::Shape Bshape = makeRandom2DShape();
+            Bshape = CT::Shape({Ashape.cols(), Bshape.cols()});
+
+            CT::Shape Cshape = CT::Shape({Ashape.rows(), Bshape.cols()});
+
+            CT::Array<T> A(Ashape);
+            CT::Array<T> B(Bshape);
+            CT::Array<T> C(Cshape);
+
+            A.setRandom(-100, 100);
+            B.setRandom(-100, 100);
+            C.setRandom(-100, 100);
+
+            A.updateDevice();
+            B.updateDevice();
+            C.updateDevice().wait();
+
+            CT::BLAS::GEMM<T>(1.0, A, B, 0.0, C).wait();
+            C.updateHost().wait();
+
+            CT::Array<T> CTest(Cshape, true);
+            CTest.eigenMap() = A.eigenMap() * B.eigenMap();
+
+            double norm = (CTest.eigenMap() - C.eigenMap()).norm();
+
+            std::ostringstream name;
+            name << "GEMM (" << i + 1 << "/" << attempts << ")";
+            std::ostringstream msg;
+            msg << "Matrix Shapes: " << Ashape << Bshape << ", "
+                << "Residual: " << norm;
+            TEST(norm < thres, name.str().c_str(), msg.str().c_str());
+        }
+        return failed;
+    };
+
+    static uint32_t GEMMBroadcast() {
+        uint32_t failed = 0;
+        CT::Shape Ashape = makeRandom2DShape();
+        CT::Shape Bshape = makeRandom2DShape();
+        Bshape = CT::Shape({Ashape.cols(), Bshape.cols()});
+
+        CT::Shape Cshape = CT::Shape({Ashape.rows(), Bshape.cols()});
+
+        CT::Array<T> A({2, 3, Ashape.rows(), Ashape.cols()});
+        CT::Array<T> B({2, 3, Bshape.rows(), Bshape.cols()});
+        CT::Array<T> C({2, 3, Cshape.rows(), Cshape.cols()});
+
+        A.setRandom(-100, 100);
+        B.setRandom(-100, 100);
+
+        A.updateDevice();
+        B.updateDevice();
+        C.updateDevice().wait();
+
+        CT::BLAS::GEMM<T>(1.0, A, B, 0.0, C).wait();
+        C.updateHost().wait();
+
+        double norm = 0;
+        CT::Array<T> CTest(Cshape, true);
+        for (int i = 0; i < 2; ++i) {
+            for (int j = 0; j < 3; ++j) {
+                CTest.eigenMap() = A[i][j].eigenMap() * B[i][j].eigenMap();
+                norm += (CTest.eigenMap() - C[i][j].eigenMap()).norm();
+            }
+        }
+
+        std::ostringstream msg;
+        msg << "Matrix Shapes: " << Ashape << Bshape << ", "
+            << "Residual: " << norm;
+        TEST(norm < thres, "GEMM Broadcast", msg.str().c_str());
+        return failed;
+    };
+
+    static uint32_t PLU() {
+        uint32_t failed = 0;
+        CT::Shape Ashape = makeRandom2DShape();
+        CT::Shape xshape = makeRandom2DShape();
+        Ashape = CT::Shape({Ashape.rows(), Ashape.rows()});
+        xshape = CT::Shape({Ashape.rows(), xshape.cols()});
+
+        CT::Array<T> A({2, 3, Ashape.rows(), Ashape.rows()});
+        CT::Array<T> x({2, 3, xshape.rows(), xshape.cols()});
+        CT::Array<T> b({2, 3, xshape.rows(), xshape.cols()});
+        CT::Array<T> Ax({2, 3, xshape.rows(), xshape.cols()});
+
+        A.setRandom(-100, 100);
+        b.setRandom(-100, 100);
+
+        CT::Array<T> LU(A.copy());
+        x = b;
+
+        A.updateDevice();
+        LU.updateDevice();
+        x.updateDevice().wait();
+
+        CT::BLAS::PLUBatch<T> luBatch(LU);
+        CT::BLAS::Batch<T> xBatch(x);
+        luBatch.computeLU().wait();
+        luBatch.solve(xBatch).wait();
+
+        // Compute Ax and compare difference.
+        CT::BLAS::GEMM<T>(1.0, A, x, 0.0, Ax).wait();
+        Ax.updateHost();
+
+        double norm = 0;
+        for (int i = 0; i < 2; ++i) {
+            for (int j = 0; j < 3; ++j) {
+                norm += (Ax[i][j].eigenMap() - b[i][j].eigenMap()).norm();
+            }
+        }
+
+        std::ostringstream msg;
+        msg << "Matrix Shape: " << Ashape << xshape << ", "
+            << "Residual: " << norm;
+        TEST(norm < thres, "PLU/Solve", msg.str().c_str());
+        return failed;
+    }
+};
+
+template <> double BLASTests<float>::thres = 10e-1;
+template <> double BLASTests<double>::thres = 10e-8;
+
+uint32_t doMacroTests() {
+    uint32_t failed = 0;
+    failed += MacroTests::Kernel();
+    failed += MacroTests::Class();
+    std::cout << "\n";
+    return failed;
+}
+
+template <typename T> uint32_t doArrayTests() {
+    uint32_t failed = 0;
+    std::cout << boxSmall("Index Tests : " + type<T>()) << "\n";
+    failed += ArrayTests<T>::Indexing();
+    std::cout << "\n" << boxSmall("Slice Tests : " + type<T>()) << "\n";
+    failed += ArrayTests<T>::Slicing();
+    std::cout << "\n";
+    return failed;
+}
+
+template <typename T> uint32_t doBLASTests() {
+    uint32_t failed = 0;
+    std::cout << boxSmall("GEMV Tests : " + type<T>()) << "\n";
+    failed += BLASTests<T>::GEMV(5);
+    failed += BLASTests<T>::GEMVBroadcast();
+
+    std::cout << "\n" << boxSmall("GEMM Tests : " + type<T>()) << "\n";
+    failed += BLASTests<T>::GEMM(5);
+    failed += BLASTests<T>::GEMMBroadcast();
+
+    std::cout << "\n" << boxSmall("PLU Tests : " + type<T>()) << "\n";
+    failed += BLASTests<T>::PLU();
+    std::cout << "\n";
+    return failed;
+}
+
+int main() {
+    uint32_t failed = 0;
+    std::cout << box("Macro Tests") << "\n";
+    failed += doMacroTests();
+
+    std::cout << box("Array Tests") << "\n";
+    // Test different sizes.
+    failed += doArrayTests<uint8_t>();
+    failed += doArrayTests<int16_t>();
+    failed += doArrayTests<int32_t>();
+    failed += doArrayTests<double>();
+
+    std::cout << box("BLAS Tests") << "\n";
+    failed += doBLASTests<float>();
+    failed += doBLASTests<double>();
+
+    constexpr uint32_t tests = 2 + 4 * 5 + 13 * 2;
+    std::ostringstream msg;
+    msg << ((failed == 0) ? "\033[1;32mPASS \033[0m(" : "\033[1;31mFAIL \033[0m(")
+        << (tests - failed) << "/" << tests << ")";
+    std::cout << box2(msg.str()) << "\n";
+
+    return 0;
+}