lookup, lookahead: fix crash when n_ctx not specified (#18729)

author Daniele Pinna <redacted>

Fri, 30 Jan 2026 20:10:24 +0000 (21:10 +0100)

committer GitHub <redacted>

Fri, 30 Jan 2026 20:10:24 +0000 (22:10 +0200)
author Daniele Pinna <redacted>
Fri, 30 Jan 2026 20:10:24 +0000 (21:10 +0100)
committer GitHub <redacted>
Fri, 30 Jan 2026 20:10:24 +0000 (22:10 +0200)
diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp

index f54cfdd77f2f35710eafb499a3ffe64d2b61a566..aa6efa62b3b65623241786a333d5f0210c9add82 100644 (file)
--- a/examples/lookahead/lookahead.cpp
+++ b/examples/lookahead/lookahead.cpp
@@ -50,6 +50,12 @@ int main(int argc, char ** argv) {
      const int N = 5;  // n-gram size
      const int G = 15; // max verification n-grams
  
+    // lookahead requires W + G + 1 sequences for parallel Jacobi decoding
+    params.n_parallel = W + G + 1;
+
+    // unified KV cache is required for coupled sequences in batch splitting
+    params.kv_unified = true;
+
      // init llama.cpp
      llama_backend_init();
      llama_numa_init(params.numa);
@@ -115,7 +121,7 @@ int main(int argc, char ** argv) {
      // seq_id == 0           : the current input token
      // seq_id [1, W]         : tokens from the past N - 1 Jacobi iterations
      // seq_id [W + 1, W + G] : verification n-grams
-    llama_batch batch = llama_batch_init(params.n_ctx, 0, W + G + 1);
+    llama_batch batch = llama_batch_init(llama_n_ctx(ctx), 0, W + G + 1);
  
      // target model sampling context
      struct common_sampler * smpl = common_sampler_init(model, params.sampling);
diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp

index 8e73138a5f2421493f82a7ccad165dfd064c5444..c7552ddde14168fe240410939c9fb3a3da39b171 100644 (file)
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@@ -106,7 +106,7 @@ int main(int argc, char ** argv){
  
      std::vector<llama_token> draft;
  
-    llama_batch batch_tgt = llama_batch_init(params.n_ctx, 0, 1);
+    llama_batch batch_tgt = llama_batch_init(llama_n_ctx(ctx), 0, 1);
  
      const auto t_dec_start = ggml_time_us();
author	Daniele Pinna <redacted>
	Fri, 30 Jan 2026 20:10:24 +0000 (21:10 +0100)
committer	GitHub <redacted>
	Fri, 30 Jan 2026 20:10:24 +0000 (22:10 +0200)
examples/lookahead/lookahead.cpp		patch \| blob \| history
examples/lookup/lookup.cpp		patch \| blob \| history