llama : fix params struct slignment (#1936)

author Ettore Di Giacinto <redacted>

Tue, 20 Jun 2023 01:24:39 +0000 (03:24 +0200)

committer GitHub <redacted>

Tue, 20 Jun 2023 01:24:39 +0000 (04:24 +0300)
author Ettore Di Giacinto <redacted>
Tue, 20 Jun 2023 01:24:39 +0000 (03:24 +0200)
committer GitHub <redacted>
Tue, 20 Jun 2023 01:24:39 +0000 (04:24 +0300)
diff --git a/llama.cpp b/llama.cpp

index 4a7d01b3297b22aeead3209c671461ebedf3ba01..e597f5048234b40de2e7cea8ad5768522795a7d1 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -925,21 +925,21 @@ static bool kv_cache_init(
  
  struct llama_context_params llama_context_default_params() {
      struct llama_context_params result = {
+        /*.seed                        =*/ -1,
          /*.n_ctx                       =*/ 512,
          /*.n_batch                     =*/ 512,
          /*.gpu_layers                  =*/ 0,
          /*.main_gpu                    =*/ 0,
          /*.tensor_split                =*/ {0},
+        /*.progress_callback           =*/ nullptr,
+        /*.progress_callback_user_data =*/ nullptr,
          /*.low_vram                    =*/ false,
-        /*.seed                        =*/ -1,
          /*.f16_kv                      =*/ true,
          /*.logits_all                  =*/ false,
          /*.vocab_only                  =*/ false,
          /*.use_mmap                    =*/ true,
          /*.use_mlock                   =*/ false,
          /*.embedding                   =*/ false,
-        /*.progress_callback           =*/ nullptr,
-        /*.progress_callback_user_data =*/ nullptr,
      };
  
      return result;
diff --git a/llama.h b/llama.h

index 1241ba6c0ec443dbd1ba7d3ca5f6102184c82499..0de530d456932ccb265cc1f51aece6468646f904 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -71,28 +71,27 @@ extern "C" {
  
      typedef void (*llama_progress_callback)(float progress, void *ctx);
  
-    struct llama_context_params {
+   struct llama_context_params {
+        int seed;                              // RNG seed, -1 for random
          int n_ctx;                             // text context
          int n_batch;                           // prompt processing batch size
          int n_gpu_layers;                      // number of layers to store in VRAM
          int main_gpu;                          // the GPU that is used for scratch and small tensors
          float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
-        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
-        int seed;                              // RNG seed, -1 for random
+        // called with a progress value between 0 and 1, pass NULL to disable
+        llama_progress_callback progress_callback;
+        // context pointer passed to the progress callback
+        void * progress_callback_user_data;
  
+        // Keep the booleans together to avoid misalignment during copy-by-value.
+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
          bool f16_kv;     // use fp16 for KV cache
          bool logits_all; // the llama_eval() call computes all logits, not just the last one
          bool vocab_only; // only load the vocabulary, no weights
          bool use_mmap;   // use mmap if possible
          bool use_mlock;  // force system to keep model in RAM
          bool embedding;  // embedding mode only
-
-        // called with a progress value between 0 and 1, pass NULL to disable
-        llama_progress_callback progress_callback;
-        // context pointer passed to the progress callback
-        void * progress_callback_user_data;
      };
-
      // model file types
      enum llama_ftype {
          LLAMA_FTYPE_ALL_F32              = 0,
author	Ettore Di Giacinto <redacted>
	Tue, 20 Jun 2023 01:24:39 +0000 (03:24 +0200)
committer	GitHub <redacted>
	Tue, 20 Jun 2023 01:24:39 +0000 (04:24 +0300)
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history