}
bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
- GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size());
-
- auto & cells = v_cells[seq_to_stream[seq_id]];
- auto & head = v_heads[seq_to_stream[seq_id]];
-
- uint32_t new_head = cells.size();
+ GGML_ASSERT(seq_id == -1 || (seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()));
if (p0 < 0) {
p0 = 0;
}
if (seq_id >= 0) {
+ auto & cells = v_cells[seq_to_stream[seq_id]];
+ auto & head = v_heads[seq_to_stream[seq_id]];
+
+ uint32_t new_head = cells.size();
+
for (uint32_t i = 0; i < cells.size(); ++i) {
if (!cells.pos_in(i, p0, p1)) {
continue;
}
}
}
+
+ // If we freed up a slot, set head to it so searching can start there.
+ if (new_head != cells.size() && new_head < head) {
+ head = new_head;
+ }
} else {
// match any sequence
- for (uint32_t i = 0; i < cells.size(); ++i) {
- if (!cells.pos_in(i, p0, p1)) {
- continue;
- }
+ for (uint32_t s = 0; s < n_stream; ++s) {
+ auto & cells = v_cells[s];
+ auto & head = v_heads[s];
- cells.rm(i);
+ uint32_t new_head = cells.size();
- if (new_head == cells.size()) {
- new_head = i;
+ for (uint32_t i = 0; i < cells.size(); ++i) {
+ if (!cells.pos_in(i, p0, p1)) {
+ continue;
+ }
+
+ cells.rm(i);
+
+ if (new_head == cells.size()) {
+ new_head = i;
+ }
}
- }
- }
- // If we freed up a slot, set head to it so searching can start there.
- if (new_head != cells.size() && new_head < head) {
- head = new_head;
+ // If we freed up a slot, set head to it so searching can start there.
+ if (new_head != cells.size() && new_head < head) {
+ head = new_head;
+ }
+ }
}
return true;