Skip to content

Commit

Permalink
add various GPU device query functions (chapel-lang#25945)
Browse files Browse the repository at this point in the history
This PR adds a `DeviceAttributes` type that includes a number of
parenless proc functions to query various attributes about the GPU. (for
example: max threads per block, device name, etc.)

This aims to resolve chapel-lang#23638

[Reviewed by @e-kayrakli]
  • Loading branch information
stonea authored Oct 21, 2024
2 parents cd66b24 + f58d81f commit 0dfd163
Show file tree
Hide file tree
Showing 14 changed files with 641 additions and 0 deletions.
117 changes: 117 additions & 0 deletions modules/standard/GPU.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -1216,4 +1216,121 @@ module GPU
gpuInputArr = gpuOutputArr;
}

proc deviceAttributes(loc) {
return new DeviceAttributes(loc);
}

extern const CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK : c_int;
extern const CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY : c_int;
extern const CHPL_GPU_ATTRIBUTE__WARP_SIZE : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_PITCH : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK : c_int;
extern const CHPL_GPU_ATTRIBUTE__CLOCK_RATE : c_int;
extern const CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT : c_int;
extern const CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT : c_int;
extern const CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT : c_int;
extern const CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT : c_int;
extern const CHPL_GPU_ATTRIBUTE__INTEGRATED : c_int;
extern const CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY : c_int;
extern const CHPL_GPU_ATTRIBUTE__COMPUTE_MODE : c_int;
extern const CHPL_GPU_ATTRIBUTE__PROCESS : c_int;
extern const CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS : c_int;
extern const CHPL_GPU_ATTRIBUTE__ECC_ENABLED : c_int;
extern const CHPL_GPU_ATTRIBUTE__PCI_BUS_ID : c_int;
extern const CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID : c_int;
extern const CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE : c_int;
extern const CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH : c_int;
extern const CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR : c_int;
extern const CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR : c_int;
extern const CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR : c_int;
extern const CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR : c_int;
extern const CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY : c_int;
extern const CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD : c_int;
extern const CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS : c_int;
extern const CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS : c_int;
extern const CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES : c_int;
extern const CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST : c_int;

extern proc chpl_gpu_query_attribute(dev : c_int, attribute : c_int) : c_int;

record DeviceAttributes {
var gpuId : int;

proc init(loc) {
if !loc.isGpu() then halt("gpuDeviceInfo must be passed gpu locale");
this.gpuId = 0; // TODO: Should be loc.gpuId
}

proc name : string {
extern proc chpl_gpu_name(dev : c_int, ref result : c_ptrConst(c_char));
var ret : string;
var tmp : c_ptrConst(c_char);

chpl_gpu_name(this.gpuId : c_int, tmp);
try! {
ret = string.createCopyingBuffer(tmp, policy=decodePolicy.escape);
}
deallocate(tmp);

return ret;
}

proc maxThreadsPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK);
proc maxBlockDimX : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X);
proc maxBlockDimY : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y);
proc maxBlockDimZ : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z);
proc MaxGridDimX : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X);
proc maxGridDimY : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y);
proc maxGridDimZ : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z);
proc maxSharedMemoryPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK);
proc totalConstantMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY);
proc warpSize : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__WARP_SIZE);
proc maxPitch : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_PITCH);
proc maximumTexture1dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH);
proc maximumTexture2dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH);
proc maximumTexture2dHeight : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT);
proc maximumTexture3dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH);
proc maximumTexture3dHeight : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT);
proc maximumTexture3dDepth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH);
proc maxRegistersPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK);
proc clockRate : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CLOCK_RATE);
proc textureAlignment : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT);
proc texturePitch_alignment : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT);
proc multiprocessorCount : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT);
proc kernelExecTimeout : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT);
proc integrated : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__INTEGRATED);
proc canMapHostMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY);
proc computeMode : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_MODE);
proc concurrentKernels : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS);
proc eccEnabled : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__ECC_ENABLED);
proc pciBusId : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PCI_BUS_ID);
proc pciDeviceId : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID);
proc memoryClockRate : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE);
proc globalMemoryBusWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH);
proc l2CacheSize : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE);
proc maxThreadsPerMultiprocessor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR);
proc computeCapabilityMajor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR);
proc computeCapabilityMinor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR);
proc maxSharedMemoryPerMultiprocessor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
proc managedMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY);
proc multiGpuBoard : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD);
proc pageableMemoryAccess : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS);
proc concurrentManagedAccess : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS);
proc pageableMemoryAccessUsesHostPageTables : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES);
proc directManagedMemAccessFromHost : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST);
}
}
4 changes: 4 additions & 0 deletions runtime/include/chpl-gpu-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ GPU_CUB_WRAP(DECL_ONE_SORT_IMPL, keys)

#undef DECL_ONE_SORT_IMPL

void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize);

int chpl_gpu_impl_query_attribute(int dev, int attribute);

#ifdef __cplusplus
}
#endif
Expand Down
48 changes: 48 additions & 0 deletions runtime/include/chpl-gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,54 @@ GPU_CUB_WRAP(DECL_ONE_SORT, keys);

#undef DECL_ONE_SORT

void chpl_gpu_name(int dev, char **result);

extern const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK;
extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X;
extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y;
extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z;
extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X;
extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y;
extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z;
extern const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK;
extern const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY;
extern const int CHPL_GPU_ATTRIBUTE__WARP_SIZE;
extern const int CHPL_GPU_ATTRIBUTE__MAX_PITCH;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT;
extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH;
extern const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK;
extern const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE;
extern const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT;
extern const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT;
extern const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT;
extern const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT;
extern const int CHPL_GPU_ATTRIBUTE__INTEGRATED;
extern const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY;
extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE;
extern const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS;
extern const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED;
extern const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID;
extern const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID;
extern const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE;
extern const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH;
extern const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE;
extern const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR;
extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR;
extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR;
extern const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR;
extern const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY;
extern const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD;
extern const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS;
extern const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS;
extern const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES;
extern const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST;

int chpl_gpu_query_attribute(int dev, int attribute);

#else // HAS_GPU_LOCALE

// Provide a fallback for the chpl_assert_on_gpu function for non-GPU locales.
Expand Down
11 changes: 11 additions & 0 deletions runtime/src/chpl-gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1582,4 +1582,15 @@ GPU_CUB_WRAP(DEF_ONE_SORT, keys)

#undef DEF_ONE_SORT

void chpl_gpu_name(int dev, char **result) {
const int BUFFER_SIZE = 0xFF;
char* resultBuffer = (char *)chpl_mem_alloc(BUFFER_SIZE, CHPL_RT_MD_IO_BUFFER, __LINE__, 0);
chpl_gpu_impl_name(dev, resultBuffer, BUFFER_SIZE);
*result = resultBuffer;
}

int chpl_gpu_query_attribute(int dev, int attribute) {
return chpl_gpu_impl_query_attribute(dev, attribute);
}

#endif
53 changes: 53 additions & 0 deletions runtime/src/gpu/amd/gpu-amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,5 +511,58 @@ void chpl_gpu_impl_host_unregister(void* var) {
ROCM_CALL(hipHostUnregister(var));
}

void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize) {
ROCM_CALL(hipDeviceGetName(resultBuffer, bufferSize, indexToDeviceID[dev]));
}

const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK = hipDeviceAttributeMaxThreadsPerBlock;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X = hipDeviceAttributeMaxBlockDimX;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y = hipDeviceAttributeMaxBlockDimY;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z = hipDeviceAttributeMaxBlockDimZ;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X = hipDeviceAttributeMaxGridDimX;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y = hipDeviceAttributeMaxGridDimY;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z = hipDeviceAttributeMaxGridDimZ;
const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK = hipDeviceAttributeMaxSharedMemoryPerBlock;
const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY = hipDeviceAttributeTotalConstantMemory;
const int CHPL_GPU_ATTRIBUTE__WARP_SIZE = hipDeviceAttributeWarpSize;
const int CHPL_GPU_ATTRIBUTE__MAX_PITCH = hipDeviceAttributeMaxPitch;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH = hipDeviceAttributeMaxTexture1DWidth;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH = hipDeviceAttributeMaxTexture2DWidth;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT = hipDeviceAttributeMaxTexture2DHeight;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH = hipDeviceAttributeMaxTexture3DWidth;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT = hipDeviceAttributeMaxTexture3DHeight;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH = hipDeviceAttributeMaxTexture3DDepth;
const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK = hipDeviceAttributeMaxRegistersPerBlock;
const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE = hipDeviceAttributeClockRate;
const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT = hipDeviceAttributeTextureAlignment;
const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT = hipDeviceAttributeTexturePitchAlignment;
const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT = hipDeviceAttributeMultiprocessorCount;
const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT = hipDeviceAttributeKernelExecTimeout;
const int CHPL_GPU_ATTRIBUTE__INTEGRATED = hipDeviceAttributeIntegrated;
const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY = hipDeviceAttributeCanMapHostMemory;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE = hipDeviceAttributeComputeMode;
const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS = hipDeviceAttributeConcurrentKernels;
const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED = hipDeviceAttributeEccEnabled;
const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID = hipDeviceAttributePciBusId;
const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID = hipDeviceAttributePciDeviceId;
const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE = hipDeviceAttributeMemoryClockRate;
const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH = hipDeviceAttributeMemoryBusWidth;
const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE = hipDeviceAttributeL2CacheSize;
const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR = hipDeviceAttributeMaxThreadsPerMultiProcessor;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR = hipDeviceAttributeComputeCapabilityMajor;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR = hipDeviceAttributeComputeCapabilityMinor;
const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = hipDeviceAttributeMaxSharedMemoryPerMultiprocessor;
const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY = hipDeviceAttributeManagedMemory;
const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD = hipDeviceAttributeIsMultiGpuBoard;
const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS = hipDeviceAttributePageableMemoryAccess;
const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS = hipDeviceAttributeConcurrentManagedAccess;
const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = hipDeviceAttributePageableMemoryAccessUsesHostPageTables;
const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = hipDeviceAttributeDirectManagedMemAccessFromHost;

int chpl_gpu_impl_query_attribute(int dev, int attribute) {
int res;
ROCM_CALL(hipDeviceGetAttribute(&res, attribute, indexToDeviceID[dev]));
return res;
}

#endif // HAS_GPU_LOCALE
55 changes: 55 additions & 0 deletions runtime/src/gpu/cpu/gpu-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,59 @@ void* chpl_gpu_impl_host_register(void* var, size_t size) { return var; }
void chpl_gpu_impl_host_unregister(void* var) { }

#undef DEF_ONE_SORT

void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize) {
strcpy(resultBuffer, "chapel-cpu-as-device-gpu");
}

const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK = 0;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X = 1;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y = 2;
const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z = 3;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X = 4;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y = 5;
const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z = 6;
const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK = 7;
const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY = 8;
const int CHPL_GPU_ATTRIBUTE__WARP_SIZE = 9;
const int CHPL_GPU_ATTRIBUTE__MAX_PITCH = 10;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH = 11;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH = 12;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT = 13;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH = 14;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT = 15;
const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH = 16;
const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK = 17;
const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE = 18;
const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT = 19;
const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT = 20;
const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT = 21;
const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT = 22;
const int CHPL_GPU_ATTRIBUTE__INTEGRATED = 23;
const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY = 24;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE = 25;
const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS = 26;
const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED = 27;
const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID = 28;
const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID = 29;
const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE = 30;
const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH = 31;
const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE = 32;
const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR = 33;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR = 34;
const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR = 35;
const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 36;
const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY = 37;
const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD = 38;
const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS = 39;
const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS = 40;
const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 41;
const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 42;

int chpl_gpu_impl_query_attribute(int dev, int attribute) {
chpl_warning(
"querying gpu attributes is currently unsupported in cpu-as-device mode.",0,0);
return -1;
}

#endif // HAS_GPU_LOCALE
Loading

0 comments on commit 0dfd163

Please sign in to comment.