static bool llama_kv_cache_find_slot(
struct llama_kv_cache & cache,
const struct llama_batch & batch) {
- const uint32_t n_ctx = cache.size;
const uint32_t n_tokens = batch.n_tokens;
if (cache.recurrent) {
}
// otherwise, one cell per token.
- if (n_tokens > n_ctx) {
- LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx=%d\n", __func__, n_tokens, n_ctx);
+ if (n_tokens > cache.size) {
+ LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size=%d\n", __func__, n_tokens, cache.size);
return false;
}
uint32_t n_tested = 0;
while (true) {
- if (cache.head + n_tokens > n_ctx) {
- n_tested += n_ctx - cache.head;
+ if (cache.head + n_tokens > cache.size) {
+ n_tested += cache.size - cache.head;
cache.head = 0;
continue;
}
break;
}
- if (n_tested >= n_ctx) {
+ if (n_tested >= cache.size) {
//LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
return false;
}