This repository serve as a backup for my Maxwell-TD code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

78 lines
2.1 KiB

#pragma once
#include <mutex>
#include <cassert>
#include <atomic>
#include <algorithm>
#include <chrono>
#include <cmath>
#include <thread>
#include <vector>
#include <string>
#include <iostream>
#include <numeric>
#include <tuple>
#include "cuda.h"
#define cuda_err_chk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=false) {
if(code != cudaSuccess) {
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(1);
}
}
#define PRINT_ERROR \
do { \
fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", \
__LINE__, __FILE__, errno, strerror(errno)); exit(1); \
} while(0)
static std::chrono::time_point<std::chrono::high_resolution_clock> now() {
return std::chrono::high_resolution_clock::now();
}
/*Device function that returns how many SMs are there in the device/arch - it can be more than the maximum readable SMs*/
__device__ __forceinline__ unsigned int getnsmid(){
unsigned int r;
asm("mov.u32 %0, %%nsmid;" : "=r"(r));
return r;
}
__device__ __forceinline__ unsigned int my_lanemask32_lt()
{
unsigned int lanemask32_lt;
asm volatile("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask32_lt));
return (lanemask32_lt);
}
/*Device function that returns the current SMID of for the block being run*/
__device__ __forceinline__ unsigned int getsmid(){
unsigned int r;
asm("mov.u32 %0, %%smid;" : "=r"(r));
return r;
}
/*Device function that returns the current warpid of for the block being run*/
__device__ __forceinline__ unsigned int getwarpid(){
unsigned int r;
asm("mov.u32 %0, %%warpid;" : "=r"(r));
return r;
}
__device__ __forceinline__ unsigned int getwarpsz()
{
unsigned int warpSize;
asm volatile("mov.u32 %0, WARP_SZ;" : "=r"(warpSize));
return warpSize;
}
/*Device function that returns the current laneid of for the warp in the block being run*/
__device__ __forceinline__ unsigned int getlaneid(){
unsigned int r;
asm("mov.u32 %0, %%laneid;" : "=r"(r));
return r;
}