Skip to content

Commit

Permalink
mv function implement to cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
NeoZhangJianyu committed Jun 29, 2024
1 parent 99937ef commit d8e29ad
Show file tree
Hide file tree
Showing 2 changed files with 306 additions and 270 deletions.
283 changes: 282 additions & 1 deletion ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,285 @@ static ggml_sycl_device_info ggml_sycl_init() try {
ggml_sycl_device_info &ggml_sycl_info() {
static ggml_sycl_device_info info = ggml_sycl_init();
return info;
}
}

//--sycl_device_mgr--

sycl_device_mgr::sycl_device_mgr(
ggml_sycl_backend_device_filter device_filter) {
switch (device_filter) {
case SYCL_DEVICES_TOP_LEVEL_ZERO:
detect_sycl_gpu_list_with_max_cu();
create_context_for_group_gpus();
break;
case SYCL_ALL_DEVICES:
detect_all_sycl_device_list();
create_context_for_devices();
break;
case SYCL_VISIBLE_DEVICES:
detect_sycl_visible_device_list();
create_context_for_devices();
break;
default:
std::cerr << "sycl_device_mgr: Invalid device_filter " << device_filter
<< std::endl;
}
init_allow_devices();
}

/*
Bind all gpus in same host with same context, for better performance in
device-to-device copy in the future.
*/
void sycl_device_mgr::create_context_for_group_gpus() {
sycl::context ctx = sycl::context(devices);
assert(device_ids.size() > 0);
first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
sycl::context ctx0 = first_queue->get_context();
for (int i = 0; i < device_ids.size(); i++) {
ctxs.push_back(ctx0);
}
}

sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::context &ctx,
sycl::device &device) {
dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
auto res = dpct::get_current_device().create_queue(ctx, device);
return res;
}

sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
int i = get_device_index(device_id);
sycl::context ctx = ctxs[i];
sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
;
return create_queue_for_device(ctx, device);
}

int sycl_device_mgr::get_device_index(int device_id) {
for (int i = 0; i < device_ids.size(); i++) {
if (device_ids[i] == device_id)
return i;
}
return -1;
}

void sycl_device_mgr::create_context_for_devices() {
for (int i = 0; i < device_ids.size(); i++) {
sycl::context ctx = sycl::context(devices[i]);
ctxs.push_back(ctx);
}
}

void sycl_device_mgr::init_allow_devices() {
device_list = "";
for (size_t i = 0; i < device_ids.size(); ++i) {
device_list += std::to_string(device_ids[i]);
device_list += ",";
}
if (device_list.length() > 1) {
device_list.pop_back();
}
}

bool sycl_device_mgr::is_allowed_device(int device_id) {
return std::find(device_ids.begin(), device_ids.end(), device_id) !=
device_ids.end();
}

void sycl_device_mgr::detect_all_sycl_device_list() try {
int device_count = dpct::dev_mgr::instance().device_count();

for (int id = 0; id < device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
device_ids.push_back(id);
devices.push_back(device);
dpct::device_info prop;
dpct::get_device_info(prop, device);
work_group_sizes.push_back(prop.get_max_work_group_size());
max_compute_units.push_back(prop.get_max_compute_units());
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}

void sycl_device_mgr::detect_sycl_visible_device_list() try {
std::vector<int> sycl_devices = get_sycl_visible_devices();
int device_count = dpct::dev_mgr::instance().device_count();

for (int i = 0; i < sycl_devices.size(); i++) {
int id = sycl_devices[i];
if (id >= device_count) {
std::cerr << __func__ << ": invalid device_id:" << id
<< " from GGML_SYCL_VISIBLE_DEVICES="
<< getenv("GGML_SYCL_VISIBLE_DEVICES")
<< ", available IDs: ";
if (device_count > 1) {
std::cerr << "[0, " << device_count - 1 << "]";
} else if (device_count == 1) {
std::cerr << "[0]";
} else {
std::cerr << "[]";
}
std::cerr << std::endl;
}
sycl::device device = dpct::dev_mgr::instance().get_device(id);
device_ids.push_back(id);
devices.push_back(device);
dpct::device_info prop;
dpct::get_device_info(prop, device);
work_group_sizes.push_back(prop.get_max_work_group_size());
max_compute_units.push_back(prop.get_max_compute_units());
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}

/*
Use all GPUs with same top max compute units
*/
void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu() try {
int device_count = dpct::dev_mgr::instance().device_count();
int local_max_compute_units = 0;
for (int id = 0; id < device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units < prop.get_max_compute_units())
local_max_compute_units = prop.get_max_compute_units();
}

for (int id = 0; id < device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units == prop.get_max_compute_units() &&
is_ext_oneapi_device(device)) {
device_ids.push_back(id);
devices.push_back(device);
work_group_sizes.push_back(prop.get_max_work_group_size());
max_compute_units.push_back(prop.get_max_compute_units());
}
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}

int sycl_device_mgr::get_device_count() { return (int)device_ids.size(); }

bool sycl_device_mgr::is_ext_oneapi_device(const sycl::device &dev) {
sycl::backend dev_backend = dev.get_backend();
if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
dev_backend == sycl::backend::ext_oneapi_cuda ||
dev_backend == sycl::backend::ext_oneapi_hip)
return true;
return false;
}
//--sycl_device_mgr--

//--ggml_sycl_device_info--
void ggml_sycl_device_info::print_gpu_device_list() {
GGML_ASSERT(device_mgr);

char *hint = NULL;
if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
"GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else if (oneapi_device_selector_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"));
} else if (sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else {
hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
"units:%d, to use any SYCL devices, set/export "
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
device_mgr->max_compute_units[0]);
}
}

int ggml_sycl_device_info::work_group_size(int device_id) {
GGML_ASSERT(device_mgr);
return device_mgr->work_group_sizes[device_id];
}

void ggml_sycl_device_info::refresh_device() {
oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
if (!device_mgr)
delete device_mgr;

if (sycl_visible_devices_existed) {
device_mgr = new sycl_device_mgr(SYCL_VISIBLE_DEVICES);
} else if (oneapi_device_selector_existed) {
device_mgr = new sycl_device_mgr(SYCL_ALL_DEVICES);
} else {
device_mgr = new sycl_device_mgr(SYCL_DEVICES_TOP_LEVEL_ZERO);
}

device_count = device_mgr->get_device_count();

int64_t total_vram = 0;

for (int i = 0; i < device_count; ++i) {
int id = get_device_id(i);
devices[id].vmm = 0;
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(id))));

default_tensor_split[i] =
total_vram; // continue data, so use device index
total_vram += prop.get_global_mem_size();

devices[id].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version();
}

for (int i = 0; i < device_count; ++i) {
default_tensor_split[i] /=
total_vram; // continue data, so use device index
}

print_gpu_device_list();
}

bool ggml_sycl_device_info::is_allowed_device(int device_id) {
return device_mgr->is_allowed_device(device_id);
}

const char *ggml_sycl_device_info::devices_list() {
return device_mgr->device_list.c_str();
}

int ggml_sycl_device_info::get_device_id(int device_index) {
if (device_index < device_mgr->device_ids.size()) {
return device_mgr->device_ids.at(device_index);
} else {
std::cerr << __func__ << ":SYCL device:" << device_index
<< " is out of range:[" << devices_list() << "]" << std::endl;
std::exit(1);
}
}

//--ggml_sycl_device_info--
Loading

0 comments on commit d8e29ad

Please sign in to comment.