diff --git a/roofit/batchcompute/src/RooBatchCompute.cu b/roofit/batchcompute/src/RooBatchCompute.cu index 76f22c5176d07..11ab959a8d6ef 100644 --- a/roofit/batchcompute/src/RooBatchCompute.cu +++ b/roofit/batchcompute/src/RooBatchCompute.cu @@ -23,6 +23,7 @@ This file contains the code for cuda computations using the RooBatchCompute libr #include "CudaInterface.h" #include +#include #include #include #include @@ -299,6 +300,11 @@ ReduceNLLOutput RooBatchComputeClass::reduceNLL(RooBatchCompute::Config const &c cudaStream_t stream = *cfg.cudaStream(); constexpr int shMemSize = 2 * blockSize * sizeof(double); + for (auto span : {probas, weights, offsetProbas}) { + cudaPointerAttributes attr; + assert(span.size() == 0 || span.data() == nullptr || (cudaPointerGetAttributes(&attr, span.data()) == cudaSuccess && attr.type == cudaMemoryTypeDevice)); + } + nllSumKernel<<>>( probas.data(), weights.size() == 1 ? nullptr : weights.data(), offsetProbas.empty() ? nullptr : offsetProbas.data(), probas.size(), devOut.data());