diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index 930ac0b3ce..6185433895 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -12,7 +12,7 @@ namespace hsolver { const int warp_size = 32; -//const unsigned int full_mask = 0xffffffff; +// const unsigned int full_mask = 0xffffffff; const int thread_per_block = 256; } @@ -65,11 +65,11 @@ void destoryBLAShandle(){ } } -template -__forceinline__ __device__ void warp_reduce(FPTYPE& val) { - for (int offset = 16; offset > 0; offset >>= 1) - val += __shfl_down_sync(full_mask, val, offset); -} +// template +// __forceinline__ __device__ void warp_reduce(FPTYPE& val) { +// for (int offset = 16; offset > 0; offset >>= 1) +// val += __shfl_down_sync(full_mask, val, offset); +// } template __global__ void line_minimize_with_block(