Skip to content

Commit

Permalink
Adapt hip_device_buffers for multigpu
Browse files Browse the repository at this point in the history
  • Loading branch information
G-071 committed Aug 21, 2023
1 parent 1026cd5 commit 95c7722
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 29 deletions.
2 changes: 1 addition & 1 deletion include/cuda_buffer_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ template <typename T, typename Host_Allocator, std::enable_if_t<std::is_trivial<
struct cuda_aggregated_device_buffer {
T *device_side_buffer;
size_t number_of_elements;
explicit cuda_aggregated_device_buffer(size_t number_of_elements, Host_Allocator &alloc)
cuda_aggregated_device_buffer(size_t number_of_elements, Host_Allocator &alloc)
: number_of_elements(number_of_elements), alloc(alloc) {
device_side_buffer =
alloc.allocate(number_of_elements);
Expand Down
39 changes: 11 additions & 28 deletions include/hip_buffer_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ template <class T> struct hip_pinned_allocator {
template <class U>
explicit hip_pinned_allocator(hip_pinned_allocator<U> const &) noexcept {}
T *allocate(std::size_t n) {
hipSetDevice(get_device_id());
T *data;
// hipError_t error =
// hipMallocHost(reinterpret_cast<void **>(&data), n * sizeof(T));
Expand Down Expand Up @@ -69,7 +68,6 @@ template <class T> struct hip_device_allocator {
template <class U>
explicit hip_device_allocator(hip_device_allocator<U> const &) noexcept {}
T *allocate(std::size_t n) {
hipSetDevice(get_device_id());
T *data;
hipError_t error = hipMalloc(&data, n * sizeof(T));
if (error != hipSuccess) {
Expand Down Expand Up @@ -115,53 +113,38 @@ using recycle_allocator_hip_device =
// TODO Is this even required? (cuda version should work fine...)
template <typename T, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
struct hip_device_buffer {
size_t gpu_id{0};
recycle_allocator_hip_device<T> allocator;
T *device_side_buffer;
size_t number_of_elements;
explicit hip_device_buffer(size_t number_of_elements)
: number_of_elements(number_of_elements) {
device_side_buffer =
recycle_allocator_hip_device<T>{}.allocate(number_of_elements);
}
explicit hip_device_buffer(size_t number_of_elements, size_t gpu_id)
: gpu_id(gpu_id), number_of_elements(number_of_elements), set_id(true) {

hip_device_buffer(size_t number_of_elements, size_t device_id)
: allocator{device_id}, number_of_elements(number_of_elements) {
assert(device_id < max_number_gpus);
device_side_buffer =
recycle_allocator_hip_device<T>{}.allocate(number_of_elements);
}
~hip_device_buffer() {
recycle_allocator_hip_device<T>{}.deallocate(device_side_buffer,
number_of_elements);
allocator.deallocate(device_side_buffer, number_of_elements);
}
// not yet implemented
hip_device_buffer(hip_device_buffer const &other) = delete;
hip_device_buffer operator=(hip_device_buffer const &other) = delete;
hip_device_buffer(hip_device_buffer const &&other) = delete;
hip_device_buffer operator=(hip_device_buffer const &&other) = delete;

private:
bool set_id{false};
};

template <typename T, typename Host_Allocator, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
struct hip_aggregated_device_buffer {
size_t gpu_id{0};
T *device_side_buffer;
size_t number_of_elements;
explicit hip_aggregated_device_buffer(size_t number_of_elements)
: number_of_elements(number_of_elements) {
device_side_buffer =
recycle_allocator_hip_device<T>{}.allocate(number_of_elements);
}
explicit hip_aggregated_device_buffer(size_t number_of_elements, size_t gpu_id, Host_Allocator &alloc)
: gpu_id(gpu_id), number_of_elements(number_of_elements), set_id(true), alloc(alloc) {
assert(gpu_id == 0);
hip_aggregated_device_buffer(size_t number_of_elements, Host_Allocator &alloc)
: number_of_elements(number_of_elements), alloc(alloc) {
device_side_buffer =
alloc.allocate(number_of_elements);
}
~hip_aggregated_device_buffer() {
assert(gpu_id == 0);
alloc.deallocate(device_side_buffer,
number_of_elements);
alloc.deallocate(device_side_buffer, number_of_elements);
}
// not yet implemented
hip_aggregated_device_buffer(hip_aggregated_device_buffer const &other) = delete;
Expand All @@ -170,8 +153,8 @@ struct hip_aggregated_device_buffer {
hip_aggregated_device_buffer operator=(hip_aggregated_device_buffer const &&other) = delete;

private:
bool set_id{false};
Host_Allocator &alloc;
Host_Allocator &alloc; // will stay valid for the entire aggregation region and hence
// for the entire lifetime of this buffer
};

} // end namespace recycler
Expand Down

0 comments on commit 95c7722

Please sign in to comment.