A library and framework for developing CPU-CUDA compatible applications under one unified code.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

34 lines
1.4 KiB

#define CUDATOOLS_IMPLEMENTATION
#include <Core.h>
#include <Array.h>
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
BASIC_LOOP(arr.shape().items()) {
arr[iThread] *= 2;
}
}
int main() {
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
// Call the kernel multiple times asynchronously. Note: since they share same
// stream, they are not run in parallel, just queued on the device.
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
arrRange.updateHost();
arrConst.updateHost();
arrLinspace.updateHost();
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
return 0;
}