diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ad848dd6e..0d3ba84ed84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ * Fix some bad triangle generation in TriangleMesh::SimplifyQuadricDecimation * Fix printing of tensor in gpu and add validation check for bounds of axis-aligned bounding box (PR #6444) * Python 3.11 support. bump pybind11 v2.6.2 -> v2.11.1 +* Check for support of CUDA Memory Pools at runtime (#4679) ## 0.13 diff --git a/cpp/open3d/core/CUDAUtils.cpp b/cpp/open3d/core/CUDAUtils.cpp index 630cbf38938..1da331035e7 100644 --- a/cpp/open3d/core/CUDAUtils.cpp +++ b/cpp/open3d/core/CUDAUtils.cpp @@ -108,6 +108,27 @@ void AssertCUDADeviceAvailable(const Device& device) { } } +bool SupportsMemoryPools(const Device& device) { +#if defined(BUILD_CUDA_MODULE) && (CUDART_VERSION >= 11020) + if (device.IsCUDA()) { + int driverVersion = 0; + int deviceSupportsMemoryPools = 0; + OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion)); + if (driverVersion >= + 11020) { // avoid invalid value error in cudaDeviceGetAttribute + OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute( + &deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported, + device.GetID())); + } + return !!deviceSupportsMemoryPools; + } else { + return false; + } +#else + return false; +#endif +} + #ifdef BUILD_CUDA_MODULE int GetDevice() { int device; diff --git a/cpp/open3d/core/CUDAUtils.h b/cpp/open3d/core/CUDAUtils.h index 2996cd0987c..15f87be040d 100644 --- a/cpp/open3d/core/CUDAUtils.h +++ b/cpp/open3d/core/CUDAUtils.h @@ -255,6 +255,13 @@ void AssertCUDADeviceAvailable(int device_id); /// \param device The device to be checked. void AssertCUDADeviceAvailable(const Device& device); +/// Checks if the CUDA device support Memory Pools +/// used by the Stream Ordered Memory Allocator, +/// see +/// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html +/// \param device The device to be checked. +bool SupportsMemoryPools(const Device& device); + #ifdef BUILD_CUDA_MODULE int GetDevice(); diff --git a/cpp/open3d/core/MemoryManagerCUDA.cpp b/cpp/open3d/core/MemoryManagerCUDA.cpp index 3cc2f4730bc..835ea550cfe 100644 --- a/cpp/open3d/core/MemoryManagerCUDA.cpp +++ b/cpp/open3d/core/MemoryManagerCUDA.cpp @@ -20,8 +20,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) { void* ptr; if (device.IsCUDA()) { #if CUDART_VERSION >= 11020 - OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast(&ptr), byte_size, - cuda::GetStream())); + if (cuda::SupportsMemoryPools(device)) { + OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast(&ptr), + byte_size, cuda::GetStream())); + } else { + OPEN3D_CUDA_CHECK(cudaMalloc(static_cast(&ptr), byte_size)); + } #else OPEN3D_CUDA_CHECK(cudaMalloc(static_cast(&ptr), byte_size)); #endif @@ -38,7 +42,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) { if (device.IsCUDA()) { if (ptr && IsCUDAPointer(ptr, device)) { #if CUDART_VERSION >= 11020 - OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream())); + if (cuda::SupportsMemoryPools(device)) { + OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream())); + } else { + OPEN3D_CUDA_CHECK(cudaFree(ptr)); + } #else OPEN3D_CUDA_CHECK(cudaFree(ptr)); #endif