From: Neo Zhang Jianyu <redacted>
Date: Thu, 1 Feb 2024 19:48:53 +0000 (+0800)
Subject: add --no-mmap in llama-bench (llama/5257)
X-Git-Tag: upstream/0.0.1642~999
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=34916bd3d9aadd96ea618e7f153cbc8be520e86b;p=pkg%2Fggml%2Fsources%2Fggml

add --no-mmap in llama-bench (llama/5257)

* add --no-mmap, show sycl backend

* fix conflict

* fix code format, change print for --no-mmap

* ren no_mmap to mmap, show mmap when not default value in printer

* update guide for mmap

* mv position to reduce model reload
---

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index 1cc55ef5..e8ba4835 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -2928,7 +2928,6 @@ void   ggml_sycl_set_main_device(int main_device);
 void   ggml_sycl_set_mul_mat_q(bool mul_mat_q);
 void   ggml_sycl_set_scratch_size(size_t scratch_size);
 void   ggml_sycl_free_scratch(void);
-int    ggml_sycl_get_device_count(void);
 void   ggml_sycl_get_device_description(int device, char * description, size_t description_size);
 bool   ggml_backend_is_sycl(ggml_backend_t backend);
 int    ggml_backend_sycl_get_device(ggml_backend_t backend);
@@ -14493,6 +14492,37 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
     return true;
 }
 
+GGML_API GGML_CALL void   ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
+    int max_compute_units = -1;
+    for(int i=0;i<max_len;i++) id_list[i] = 0;
+
+    int device_count = dpct::dev_mgr::instance().device_count();
+
+    for(int id=0; id< device_count; id++){
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu()) continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if(max_compute_units < prop.get_max_compute_units()) max_compute_units = prop.get_max_compute_units();
+    }
+
+    for(int id=0;id< device_count;id++){
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu()) continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if(max_compute_units == prop.get_max_compute_units() && prop.get_major_version() == 1 ){
+            id_list[id] = 1;
+        }
+    }
+    return;
+}
+catch (sycl::exception const &exc) {
+  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+            << ", line:" << __LINE__ << std::endl;
+  std::exit(1);
+}
+
 int ggml_sycl_get_device_count() try {
     int device_count;
     if (CHECK_TRY_ERROR(device_count =
@@ -14507,7 +14537,7 @@ catch (sycl::exception const &exc) {
   std::exit(1);
 }
 
-void ggml_sycl_get_device_description(int device, char *description,
+GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
                                       size_t description_size) try {
     dpct::device_info prop;
     SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
diff --git a/ggml-sycl.h b/ggml-sycl.h
index ba0c6147..891f2d00 100644
--- a/ggml-sycl.h
+++ b/ggml-sycl.h
@@ -22,7 +22,8 @@ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
 GGML_API void   ggml_backend_sycl_print_sycl_devices(void);
-
+GGML_API GGML_CALL void   ggml_sycl_get_gpu_list(int *id_list, int max_len);
+GGML_API GGML_CALL void   ggml_sycl_get_device_description(int device, char *description, size_t description_size);
 #ifdef  __cplusplus
 }
 #endif