Remove unused n_parts parameter (#1509)

author Stephan Walter <redacted>

Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)

committer GitHub <redacted>

Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)
author Stephan Walter <redacted>
Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)
committer GitHub <redacted>
Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)
diff --git a/examples/common.cpp b/examples/common.cpp

index 259880a7cc64f8f791c30ac12b69c2b31648e0cd..a6abc4977bc1d1d93c2c785706e460ffebebddaa 100644 (file)
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                  invalid_param = true;
                  break;
              }
-        } else if (arg == "--n-parts") {
-            if (++i >= argc) {
-                invalid_param = true;
-                break;
-            }
-            params.n_parts = std::stoi(argv[i]);
          } else if (arg == "-h" || arg == "--help") {
              gpt_print_usage(argc, argv, default_params);
              exit(0);
@@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
      fprintf(stderr, "  --no-penalize-nl      do not penalize newline token\n");
      fprintf(stderr, "  --memory-f32          use f32 instead of f16 for memory key+value\n");
      fprintf(stderr, "  --temp N              temperature (default: %.1f)\n", (double)params.temp);
-    fprintf(stderr, "  --n-parts N           number of model parts (default: -1 = determine from dimensions)\n");
      fprintf(stderr, "  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
      fprintf(stderr, "  --perplexity          compute perplexity over the prompt\n");
      fprintf(stderr, "  --keep                number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
@@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
      auto lparams = llama_context_default_params();
  
      lparams.n_ctx        = params.n_ctx;
-    lparams.n_parts      = params.n_parts;
      lparams.n_gpu_layers = params.n_gpu_layers;
      lparams.seed         = params.seed;
      lparams.f16_kv       = params.memory_f16;
diff --git a/examples/common.h b/examples/common.h

index f4e07a25257238f2f8b0e95df983e11865b65950..2ad20ba504e6d812808c8c1b4f326e80c955fb08 100644 (file)
--- a/examples/common.h
+++ b/examples/common.h
@@ -24,7 +24,6 @@ struct gpt_params {
      int32_t seed          = -1;  // RNG seed
      int32_t n_threads     = get_num_physical_cores();
      int32_t n_predict     = -1;  // new tokens to predict
-    int32_t n_parts       = -1;  // amount of model parts (-1 = determine from model dimensions)
      int32_t n_ctx         = 512; // context size
      int32_t n_batch       = 512; // batch size for prompt processing (must be >=32 to use BLAS)
      int32_t n_keep        = 0;   // number of tokens to keep from initial prompt
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp

index 9a2aa7c6474fbb82971ca055525a40044e1d298b..085fdde3caf1e9cba01ab71b7b3a5bcd0fe67c68 100644 (file)
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -321,7 +321,6 @@ int main(int argc, char ** argv) {
          auto lparams = llama_context_default_params();
  
          lparams.n_ctx      = 256;
-        lparams.n_parts    = 1;
          lparams.seed       = 1;
          lparams.f16_kv     = false;
          lparams.use_mlock  = false;
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp

index 35596957974cadf942fb9f5d63eafc0c62a708a7..91f04b6c7bcb246378de121ea7351316998d0f37 100644 (file)
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
      auto lparams = llama_context_default_params();
  
      lparams.n_ctx     = params.n_ctx;
-    lparams.n_parts   = params.n_parts;
      lparams.seed      = params.seed;
      lparams.f16_kv    = params.memory_f16;
      lparams.use_mmap  = params.use_mmap;
diff --git a/llama.cpp b/llama.cpp

index 98f49abd7cf4839df53bad2fa63bba98c8241a21..6e19064fc14daff6b5078250e2f8662276295b3a 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -812,7 +812,6 @@ static bool kv_cache_init(
  struct llama_context_params llama_context_default_params() {
      struct llama_context_params result = {
          /*.n_ctx                       =*/ 512,
-        /*.n_parts                     =*/ -1,
          /*.gpu_layers                  =*/ 0,
          /*.seed                        =*/ -1,
          /*.f16_kv                      =*/ false,
diff --git a/llama.h b/llama.h

index 21cba8cf61061a0c2263054762671d632444cc34..f955fa23db048ffd5dbfc5e8e03faf5dba192bfa 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -55,7 +55,6 @@ extern "C" {
  
      struct llama_context_params {
          int n_ctx;        // text context
-        int n_parts;      // -1 for default
          int n_gpu_layers; // number of layers to store in VRAM
          int seed;         // RNG seed, -1 for random
author	Stephan Walter <redacted>
	Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)
committer	GitHub <redacted>
	Wed, 17 May 2023 22:12:01 +0000 (22:12 +0000)
examples/common.cpp		patch \| blob \| history
examples/common.h		patch \| blob \| history
examples/quantize-stats/quantize-stats.cpp		patch \| blob \| history
examples/save-load-state/save-load-state.cpp		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history