ggml : add ggml_cpu_has_avx_vnni() (#4589)

author automaticcat <redacted>

Sat, 30 Dec 2023 08:07:48 +0000 (15:07 +0700)

committer GitHub <redacted>

Sat, 30 Dec 2023 08:07:48 +0000 (10:07 +0200)
author automaticcat <redacted>
Sat, 30 Dec 2023 08:07:48 +0000 (15:07 +0700)
committer GitHub <redacted>
Sat, 30 Dec 2023 08:07:48 +0000 (10:07 +0200)
diff --git a/README.md b/README.md

index 48dcd6464038ebb22538c46c9a4265871048109e..ca6d14e175b099ea7b77b2b8b373e10f2aeae4c2 100644 (file)
--- a/README.md
+++ b/README.md
@@ -385,16 +385,30 @@ Building the program with BLAS support may lead to some performance improvements
  
    Check [BLIS.md](docs/BLIS.md) for more information.
  
-- #### Intel MKL
+- #### Intel oneMKL
+  - Using manual oneAPI installation:
+    By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
+      ```bash
+      mkdir build
+      cd build
+      source /opt/intel/oneapi/setvars.sh # You can skip this step if  in oneapi-runtime docker image, only required for manual installation
+      cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
+      cmake --build . --config Release
+      ```
  
-  By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by:
+  - Using oneAPI docker image:
+    If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-runtime](https://hub.docker.com/r/intel/oneapi-runtime)
  
-  ```bash
-  mkdir build
-  cd build
-  cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-  cmake --build . --config Release
-  ```
+      ```bash
+      mkdir build
+      cd build
+      cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
+      cmake --build . --config Release
+      ```
+
+  Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni.
+
+  Check [Optimizing and Running LLaMA2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information.
  
  - #### cuBLAS
  
diff --git a/common/common.cpp b/common/common.cpp

index b3425ab09eaf8d140fae444df549896b5e3e4ae5..eacaee18e09071d9b41facf1453fadbc0e573b23 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1394,6 +1394,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
      fprintf(stream, "build_number: %d\n",        LLAMA_BUILD_NUMBER);
      fprintf(stream, "cpu_has_arm_fma: %s\n",     ggml_cpu_has_arm_fma()     ? "true" : "false");
      fprintf(stream, "cpu_has_avx: %s\n",         ggml_cpu_has_avx()         ? "true" : "false");
+    fprintf(stream, "cpu_has_avx_vnni: %s\n",    ggml_cpu_has_avx_vnni()    ? "true" : "false");
      fprintf(stream, "cpu_has_avx2: %s\n",        ggml_cpu_has_avx2()        ? "true" : "false");
      fprintf(stream, "cpu_has_avx512: %s\n",      ggml_cpu_has_avx512()      ? "true" : "false");
      fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
diff --git a/ggml.c b/ggml.c

index a9e1ea9b40ec4eb7fd2d69fec1e4ef76255b85fa..bcec200f65e0401a18bd315e0cb45394b11624c3 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -19638,6 +19638,14 @@ int ggml_cpu_has_avx(void) {
  #endif
  }
  
+int ggml_cpu_has_avx_vnni(void) {
+#if defined(__AVXVNNI__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
  int ggml_cpu_has_avx2(void) {
  #if defined(__AVX2__)
      return 1;
diff --git a/ggml.h b/ggml.h

index 67d6bc4f1ef1b2bbfc2ba6bf5e377f8e2c52cb6b..64f4e45e880fae113dbc5b60479b0cdcaef9b429 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2198,6 +2198,7 @@ extern "C" {
      //
  
      GGML_API int ggml_cpu_has_avx        (void);
+    GGML_API int ggml_cpu_has_avx_vnni   (void);
      GGML_API int ggml_cpu_has_avx2       (void);
      GGML_API int ggml_cpu_has_avx512     (void);
      GGML_API int ggml_cpu_has_avx512_vbmi(void);
diff --git a/llama.cpp b/llama.cpp

index 68c7cced6bb5a289ff0aacfdf298b32cdc62d2a9..a833d4c15a9d0d6b4edadaf8c69eca32655844ad 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -10780,6 +10780,7 @@ const char * llama_print_system_info(void) {
  
      s  = "";
      s += "AVX = "         + std::to_string(ggml_cpu_has_avx())         + " | ";
+    s += "AVX_VNNI = "    + std::to_string(ggml_cpu_has_avx_vnni())    + " | ";
      s += "AVX2 = "        + std::to_string(ggml_cpu_has_avx2())        + " | ";
      s += "AVX512 = "      + std::to_string(ggml_cpu_has_avx512())      + " | ";
      s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | ";
author	automaticcat <redacted>
	Sat, 30 Dec 2023 08:07:48 +0000 (15:07 +0700)
committer	GitHub <redacted>
	Sat, 30 Dec 2023 08:07:48 +0000 (10:07 +0200)
README.md		patch \| blob \| history
common/common.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history