parent
2a920ca7ba
commit
00a27b66c3
8 changed files with 393 additions and 115 deletions
@ -0,0 +1,125 @@ |
||||
#ifndef CUDATOOLS_COMPLEX_H |
||||
#define CUDATOOLS_COMPLEX_H |
||||
|
||||
#include "Macros.h" |
||||
#include <cmath> |
||||
#include <complex> |
||||
|
||||
/**
|
||||
* This is directly adapated from cuComplex.h, except placed into a C++ friendly format. |
||||
*/ |
||||
|
||||
namespace CudaTools { |
||||
|
||||
template <typename T> class complex { |
||||
private: |
||||
T r = 0; |
||||
T i = 0; |
||||
|
||||
public: |
||||
HD complex() = default; |
||||
HD complex(T real, T imag) : r(real), i(imag){}; |
||||
HD complex(T x) : r(x), i(0){}; |
||||
|
||||
HD complex<T> operator+(const complex<T> z) const { return complex(r + z.r, i + z.i); }; |
||||
HD complex<T> operator-(const complex<T> z) const { return complex(r - z.r, i - z.i); }; |
||||
HD complex<T> operator*(const T y) const { return complex(r * y, i * y); }; |
||||
HD complex<T> operator/(const T y) const { return complex(r / y, i / y); }; |
||||
|
||||
HD complex<T> operator*(const complex<T> z) const { |
||||
return complex(r * z.r - i * z.i, r * z.i + i * z.r); |
||||
}; |
||||
HD complex<T> operator/(const complex<T> z) const { |
||||
T s = std::abs(z.r) + std::abs(z.i); |
||||
T oos = 1.0f / s; |
||||
T ars = r * oos, ais = i * oos, brs = z.r * oos, bis = z.i * oos; |
||||
s = (brs * brs) + (bis * bis); |
||||
oos = 1.0f / s; |
||||
return complex(ars * brs + ais * bis, ais * brs - ars * bis) * oos; |
||||
}; |
||||
|
||||
HD void operator+=(const complex<T> z) { |
||||
r += z.r; |
||||
i += z.i; |
||||
}; |
||||
HD void operator-=(const complex<T> z) { |
||||
r -= z.r; |
||||
i -= z.i; |
||||
}; |
||||
HD void operator*=(const T y) { |
||||
r *= y; |
||||
i *= y; |
||||
}; |
||||
HD void operator/=(const T y) { |
||||
r /= y; |
||||
i /= y; |
||||
}; |
||||
|
||||
HD void operator*=(const complex<T> z) { |
||||
T a = r * z.r - i * z.i, b = r * z.i + i * z.r; |
||||
r = a; |
||||
i = b; |
||||
} |
||||
|
||||
HD void operator/=(const complex<T> z) { |
||||
T s = std::abs(z.r) + std::abs(z.i); |
||||
T oos = 1.0f / s; |
||||
T ars = r * oos, ais = i * oos, brs = z.r * oos, bis = z.i * oos; |
||||
s = (brs * brs) + (bis * bis); |
||||
oos = 1.0f / s; |
||||
r = (ars * brs + ais * bis) * oos; |
||||
i = (ais * brs - ars * bis) * oos; |
||||
}; |
||||
|
||||
HD T abs() const { |
||||
T a = std::abs(r), b = std::abs(i); |
||||
T v, w; |
||||
if (a > b) { |
||||
v = a; |
||||
w = b; |
||||
} else { |
||||
v = b; |
||||
w = a; |
||||
} |
||||
T t = w / v; |
||||
t = 1.0f + t * t; |
||||
t = v * std::sqrt(t); |
||||
if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) { |
||||
t = v + w; |
||||
} |
||||
return t; |
||||
} |
||||
|
||||
HD complex<T> conj() const { return complex(r, -1 * i); } |
||||
|
||||
HD T real() const { return r; }; |
||||
HD T imag() const { return i; }; |
||||
}; |
||||
|
||||
template class complex<real32>; |
||||
template class complex<real64>; |
||||
|
||||
template <class T> complex<T> operator*(const T y, const complex<T> z) { return z * y; }; |
||||
template <class T> complex<T> operator/(const T y, const complex<T> z) { return z / y; }; |
||||
|
||||
template complex<real32> operator*<real32>(const real32, const complex<real32>); |
||||
template complex<real64> operator*<real64>(const real64, const complex<real64>); |
||||
template complex<real32> operator/<real32>(const real32, const complex<real32>); |
||||
template complex<real64> operator/<real64>(const real64, const complex<real64>); |
||||
|
||||
}; // namespace CudaTools
|
||||
|
||||
#ifdef CUDA |
||||
using complex64 = CudaTools::complex<real32>; |
||||
using complex128 = CudaTools::complex<real64>; |
||||
#else |
||||
using complex64 = std::complex<real32>; /**< Type alias for 64-bit complex floating point datatype.
|
||||
* This adapts depending on the CUDA compilation flag, and |
||||
* will automatically switch CudaTools::complex<real32>. */ |
||||
using complex128 = |
||||
std::complex<real64>; /**< Type alias for 128-bit complex floating point datatype. This adapts
|
||||
* depending on the CUDA compilation flag, and will automatically switch |
||||
* CudaTools::complex<real64>. */ |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,95 @@ |
||||
CC := g++-10
|
||||
NVCC := nvcc
|
||||
CFLAGS := -Wall -std=c++17 -fopenmp -MMD
|
||||
NVCC_FLAGS := -MMD -w -Xcompiler
|
||||
|
||||
INCLUDE := <<Put extra include directories here, separated by a space>>
|
||||
LIBS_DIR := <<Put library directories here, separated by a space>>
|
||||
LIBS_DIR_GPU := /usr/local/cuda/lib64 <<Put extra include GPU library directories here, separated by a space>>
|
||||
LIBS := <<Put the names of the libraries here, separated by a space>>
|
||||
LIBS_GPU := cuda cudart cublas <<Put extra GPU libraries here, separated by a space>>
|
||||
|
||||
TARGET = <<Put the name of your target here>>
|
||||
SRC_DIR = .
|
||||
BUILD_DIR = build
|
||||
|
||||
# Should not need to modify below.
|
||||
|
||||
CPU_BUILD_DIR = $(BUILD_DIR)/cpu
|
||||
GPU_BUILD_DIR = $(BUILD_DIR)/gpu
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*/*.cpp) $(wildcard $(SRC_DIR)/*.cpp)
|
||||
|
||||
# Get source files and object files.
|
||||
GCC_SRC = $(filter-out %.cu.cpp ,$(SRC))
|
||||
NVCC_SRC = $(filter %.cu.cpp, $(SRC))
|
||||
GCC_OBJ = $(GCC_SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
NVCC_OBJ = $(NVCC_SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
# If compiling for CPU, all go to GCC. Otherwise, they are split.
|
||||
CPU_OBJ = $(addprefix $(CPU_BUILD_DIR)/,$(GCC_OBJ)) $(addprefix $(CPU_BUILD_DIR)/,$(NVCC_OBJ))
|
||||
GPU_GCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(GCC_OBJ))
|
||||
GPU_NVCC_OBJ = $(addprefix $(GPU_BUILD_DIR)/,$(NVCC_OBJ))
|
||||
|
||||
# $(info $$GCC_SRC is [${GCC_SRC}])
|
||||
# $(info $$NVCC_SRC is [${NVCC_SRC}])
|
||||
# $(info $$GCC_OBJ is [${GCC_OBJ}])
|
||||
# $(info $$NVCC_OBJ is [${NVCC_OBJ}])
|
||||
|
||||
# $(info $$CPU_OBJ is [${CPU_OBJ}])
|
||||
# $(info $$GPU_GCC_OBJ is [${GPU_GCC_OBJ}])
|
||||
# $(info $$GPU_NVCC_OBJ is [${GPU_NVCC_OBJ}])
|
||||
|
||||
HEADER = $(wildcard $(SRC_DIR)/*/*.h) $(wildcard $(SRC_DIR)/*.h)
|
||||
CPU_DEPS = $(wildcard $(CPU_BUILD_DIR)/*.d)
|
||||
GPU_DEPS = $(wildcard $(GPU_BUILD_DIR)/*.d)
|
||||
|
||||
INC := $(INCLUDE:%=-I%)
|
||||
LIB := $(LIBS_DIR:%=-L%)
|
||||
LIB_GPU := $(LIBS_DIR_GPU:%=-L%)
|
||||
LD := $(LIBS:%=-l%)
|
||||
LD_GPU := $(LIBS_GPU:%=-l%)
|
||||
|
||||
# Reminder:
|
||||
# $< = first prerequisite
|
||||
# $@ = the target which matched the rule
|
||||
# $^ = all prerequisites
|
||||
|
||||
.PHONY: all clean |
||||
|
||||
all : cpu gpu |
||||
|
||||
cpu: $(TARGET)CPU |
||||
gpu: $(TARGET)GPU |
||||
|
||||
$(TARGET)CPU: $(CPU_OBJ) |
||||
$(CC) $(CFLAGS) $^ -o $@ $(INC) $(LIB) $(LDFLAGS)
|
||||
|
||||
$(CPU_BUILD_DIR)/%.o $(CPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cpp | $(CPU_BUILD_DIR) |
||||
$(CC) $(CFLAGS) -c -o $@ $< $(INC)
|
||||
|
||||
# For GPU, we need to build the NVCC objects, the NVCC linked object, and the
|
||||
# regular ones. Then, we link them all together.
|
||||
$(TARGET)GPU: $(GPU_BUILD_DIR)/link.o $(GPU_GCC_OBJ) | $(GPU_BUILD_DIR) |
||||
$(CC) -g -DCUDA $(CFLAGS) $(GPU_NVCC_OBJ) $^ -o $@ $(INC) $(LIB) $(LIB_GPU) $(LD) $(LD_GPU)
|
||||
|
||||
$(GPU_BUILD_DIR)/link.o: $(GPU_NVCC_OBJ) | $(GPU_BUILD_DIR) |
||||
$(NVCC) --device-link $^ -o $@
|
||||
|
||||
$(GPU_BUILD_DIR)/%.cu.o: $(SRC_DIR)/%.cu.cpp | $(GPU_BUILD_DIR) |
||||
$(NVCC) $(NVCC_FLAGS) -DCUDA -x cu --device-c -o $@ $< $(INC)
|
||||
|
||||
$(GPU_BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp | $(GPU_BUILD_DIR) |
||||
$(CC) $(CFLAGS) -g -DCUDA -c -o $@ $< $(INC)
|
||||
|
||||
-include $(CPU_DEPS) |
||||
-include $(GPU_DEPS) |
||||
|
||||
$(CPU_BUILD_DIR): |
||||
mkdir -p $@
|
||||
|
||||
$(GPU_BUILD_DIR): |
||||
mkdir -p $@
|
||||
|
||||
clean: |
||||
rm -Rf $(BUILD_DIR) $(TARGET)CPU $(TARGET)GPU
|
Loading…
Reference in new issue