mv function implement to cpp

arthw · Jun 29, 2024 · d8e29ad · d8e29ad
1 parent 99937ef
commit d8e29ad
Show file tree

Hide file tree

Showing 2 changed files with 306 additions and 270 deletions.
diff --git a/ggml-sycl/common.cpp b/ggml-sycl/common.cpp
@@ -213,4 +213,285 @@ static ggml_sycl_device_info ggml_sycl_init() try {
 ggml_sycl_device_info &ggml_sycl_info() {
     static ggml_sycl_device_info info = ggml_sycl_init();
     return info;
-}
+}
+
+//--sycl_device_mgr--
+
+sycl_device_mgr::sycl_device_mgr(
+    ggml_sycl_backend_device_filter device_filter) {
+    switch (device_filter) {
+    case SYCL_DEVICES_TOP_LEVEL_ZERO:
+        detect_sycl_gpu_list_with_max_cu();
+        create_context_for_group_gpus();
+        break;
+    case SYCL_ALL_DEVICES:
+        detect_all_sycl_device_list();
+        create_context_for_devices();
+        break;
+    case SYCL_VISIBLE_DEVICES:
+        detect_sycl_visible_device_list();
+        create_context_for_devices();
+        break;
+    default:
+        std::cerr << "sycl_device_mgr: Invalid device_filter " << device_filter
+                  << std::endl;
+    }
+    init_allow_devices();
+}
+
+/*
+Bind all gpus in same host with same context, for better performance in
+device-to-device copy in the future.
+*/
+void sycl_device_mgr::create_context_for_group_gpus() {
+    sycl::context ctx = sycl::context(devices);
+    assert(device_ids.size() > 0);
+    first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
+    sycl::context ctx0 = first_queue->get_context();
+    for (int i = 0; i < device_ids.size(); i++) {
+        ctxs.push_back(ctx0);
+    }
+}
+
+sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::context &ctx,
+                                                      sycl::device &device) {
+    dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
+    auto res = dpct::get_current_device().create_queue(ctx, device);
+    return res;
+}
+
+sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
+    int i = get_device_index(device_id);
+    sycl::context ctx = ctxs[i];
+    sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
+    ;
+    return create_queue_for_device(ctx, device);
+}
+
+int sycl_device_mgr::get_device_index(int device_id) {
+    for (int i = 0; i < device_ids.size(); i++) {
+        if (device_ids[i] == device_id)
+            return i;
+    }
+    return -1;
+}
+
+void sycl_device_mgr::create_context_for_devices() {
+    for (int i = 0; i < device_ids.size(); i++) {
+        sycl::context ctx = sycl::context(devices[i]);
+        ctxs.push_back(ctx);
+    }
+}
+
+void sycl_device_mgr::init_allow_devices() {
+    device_list = "";
+    for (size_t i = 0; i < device_ids.size(); ++i) {
+        device_list += std::to_string(device_ids[i]);
+        device_list += ",";
+    }
+    if (device_list.length() > 1) {
+        device_list.pop_back();
+    }
+}
+
+bool sycl_device_mgr::is_allowed_device(int device_id) {
+    return std::find(device_ids.begin(), device_ids.end(), device_id) !=
+           device_ids.end();
+}
+
+void sycl_device_mgr::detect_all_sycl_device_list() try {
+    int device_count = dpct::dev_mgr::instance().device_count();
+
+    for (int id = 0; id < device_count; id++) {
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        device_ids.push_back(id);
+        devices.push_back(device);
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        work_group_sizes.push_back(prop.get_max_work_group_size());
+        max_compute_units.push_back(prop.get_max_compute_units());
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+void sycl_device_mgr::detect_sycl_visible_device_list() try {
+    std::vector<int> sycl_devices = get_sycl_visible_devices();
+    int device_count = dpct::dev_mgr::instance().device_count();
+
+    for (int i = 0; i < sycl_devices.size(); i++) {
+        int id = sycl_devices[i];
+        if (id >= device_count) {
+            std::cerr << __func__ << ": invalid device_id:" << id
+                      << " from GGML_SYCL_VISIBLE_DEVICES="
+                      << getenv("GGML_SYCL_VISIBLE_DEVICES")
+                      << ", available IDs: ";
+            if (device_count > 1) {
+                std::cerr << "[0, " << device_count - 1 << "]";
+            } else if (device_count == 1) {
+                std::cerr << "[0]";
+            } else {
+                std::cerr << "[]";
+            }
+            std::cerr << std::endl;
+        }
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        device_ids.push_back(id);
+        devices.push_back(device);
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        work_group_sizes.push_back(prop.get_max_work_group_size());
+        max_compute_units.push_back(prop.get_max_compute_units());
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+/*
+Use all GPUs with same top max compute units
+*/
+void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu() try {
+    int device_count = dpct::dev_mgr::instance().device_count();
+    int local_max_compute_units = 0;
+    for (int id = 0; id < device_count; id++) {
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu())
+            continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if (local_max_compute_units < prop.get_max_compute_units())
+            local_max_compute_units = prop.get_max_compute_units();
+    }
+
+    for (int id = 0; id < device_count; id++) {
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu())
+            continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if (local_max_compute_units == prop.get_max_compute_units() &&
+            is_ext_oneapi_device(device)) {
+            device_ids.push_back(id);
+            devices.push_back(device);
+            work_group_sizes.push_back(prop.get_max_work_group_size());
+            max_compute_units.push_back(prop.get_max_compute_units());
+        }
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+int sycl_device_mgr::get_device_count() { return (int)device_ids.size(); }
+
+bool sycl_device_mgr::is_ext_oneapi_device(const sycl::device &dev) {
+    sycl::backend dev_backend = dev.get_backend();
+    if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
+        dev_backend == sycl::backend::ext_oneapi_cuda ||
+        dev_backend == sycl::backend::ext_oneapi_hip)
+        return true;
+    return false;
+}
+//--sycl_device_mgr--
+
+//--ggml_sycl_device_info--
+void ggml_sycl_device_info::print_gpu_device_list() {
+    GGML_ASSERT(device_mgr);
+
+    char *hint = NULL;
+    if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
+        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
+               "GGML_SYCL_VISIBLE_DEVICES=%s\n";
+        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
+                getenv("ONEAPI_DEVICE_SELECTOR"),
+                getenv("GGML_SYCL_VISIBLE_DEVICES"));
+    } else if (oneapi_device_selector_existed) {
+        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
+        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
+                getenv("ONEAPI_DEVICE_SELECTOR"));
+    } else if (sycl_visible_devices_existed) {
+        hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
+        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
+                getenv("GGML_SYCL_VISIBLE_DEVICES"));
+    } else {
+        hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
+               "units:%d, to use any SYCL devices, set/export "
+               "GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
+        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
+                device_mgr->max_compute_units[0]);
+    }
+}
+
+int ggml_sycl_device_info::work_group_size(int device_id) {
+    GGML_ASSERT(device_mgr);
+    return device_mgr->work_group_sizes[device_id];
+}
+
+void ggml_sycl_device_info::refresh_device() {
+    oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
+    sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
+    if (!device_mgr)
+        delete device_mgr;
+
+    if (sycl_visible_devices_existed) {
+        device_mgr = new sycl_device_mgr(SYCL_VISIBLE_DEVICES);
+    } else if (oneapi_device_selector_existed) {
+        device_mgr = new sycl_device_mgr(SYCL_ALL_DEVICES);
+    } else {
+        device_mgr = new sycl_device_mgr(SYCL_DEVICES_TOP_LEVEL_ZERO);
+    }
+
+    device_count = device_mgr->get_device_count();
+
+    int64_t total_vram = 0;
+
+    for (int i = 0; i < device_count; ++i) {
+        int id = get_device_id(i);
+        devices[id].vmm = 0;
+        dpct::device_info prop;
+        SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
+            prop, dpct::dev_mgr::instance().get_device(id))));
+
+        default_tensor_split[i] =
+            total_vram; // continue data, so use device index
+        total_vram += prop.get_global_mem_size();
+
+        devices[id].cc =
+            100 * prop.get_major_version() + 10 * prop.get_minor_version();
+    }
+
+    for (int i = 0; i < device_count; ++i) {
+        default_tensor_split[i] /=
+            total_vram; // continue data, so use device index
+    }
+
+    print_gpu_device_list();
+}
+
+bool ggml_sycl_device_info::is_allowed_device(int device_id) {
+    return device_mgr->is_allowed_device(device_id);
+}
+
+const char *ggml_sycl_device_info::devices_list() {
+    return device_mgr->device_list.c_str();
+}
+
+int ggml_sycl_device_info::get_device_id(int device_index) {
+    if (device_index < device_mgr->device_ids.size()) {
+        return device_mgr->device_ids.at(device_index);
+    } else {
+        std::cerr << __func__ << ":SYCL device:" << device_index
+                  << " is out of range:[" << devices_list() << "]" << std::endl;
+        std::exit(1);
+    }
+}
+
+//--ggml_sycl_device_info--