You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
35 lines
1.4 KiB
35 lines
1.4 KiB
2 years ago
|
#define CUDATOOLS_IMPLEMENTATION
|
||
|
#include <Core.h>
|
||
|
#include <Array.h>
|
||
|
|
||
|
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
|
||
|
BASIC_LOOP(arr.shape().items()) {
|
||
|
arr[iThread] *= 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int main() {
|
||
|
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
|
||
|
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
|
||
|
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
|
||
|
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
|
||
|
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
|
||
|
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||
|
|
||
|
// Call the kernel multiple times asynchronously. Note: since they share same
|
||
|
// stream, they are not run in parallel, just queued on the device.
|
||
|
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
|
||
|
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
|
||
|
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
|
||
|
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
|
||
|
arrRange.updateHost();
|
||
|
arrConst.updateHost();
|
||
|
arrLinspace.updateHost();
|
||
|
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
|
||
|
|
||
|
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|