// finalize the batch processing
kv_guard.commit();
+ // set to total number of outputs in the batch, for use in llama_get_logits_ith
+ n_outputs = n_outputs_all;
+
// set output mappings
{
bool sorted_output = true;
}
}
- // set to total number of outputs in the batch, for use in llama_get_logits_ith
- n_outputs = n_outputs_all;
-
// wait for the computation to finish (automatically done when obtaining the model output)
//synchronize();