}
llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const {
- if (debug > 0) {
- const auto & cells = v_cells[seq_to_stream[1]];
-
- const uint32_t head_cur = v_heads[1];
- LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n",
- __func__, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa);
+ if (debug > 0) {
+ for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) {
+ const auto seq_id = ubatch.seq_id_unq[s];
+ const auto stream_id = seq_to_stream[seq_id];
+ const auto & cells = v_cells[stream_id];
+ const uint32_t head_cur = v_heads[stream_id];
+
+ LLAMA_LOG_DEBUG("%s: stream[%d], n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n",
+ __func__, stream_id, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa);
+
+ if ((debug == 2 && n_swa > 0) || debug > 2) {
+ std::string ss;
+ for (uint32_t i = 0; i < cells.size(); ++i) {
+ if (cells.is_empty(i)) {
+ ss += '.';
+ } else {
+ assert(cells.seq_count(i) >= 1);
- if ((debug == 2 && n_swa > 0) || debug > 2) {
- std::string ss;
- for (uint32_t i = 0; i < cells.size(); ++i) {
- if (cells.is_empty(i)) {
- ss += '.';
- } else {
- assert(cells.seq_count(i) >= 1);
+ if (cells.seq_count(i) == 1) {
+ ss += std::to_string(cells.seq_get(i));
+ } else {
+ ss += 'M';
+ }
+ }
+ if (i%256 == 255) {
+ ss += " *";
+ ss += '\n';
+ }
+ }
+ LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
+ }
- if (cells.seq_count(i) == 1) {
- ss += std::to_string(cells.seq_get(i));
+ if ((debug == 2 && n_swa > 0) || debug > 2) {
+ std::string ss;
+ for (uint32_t i = 0; i < cells.size(); ++i) {
+ std::string cur;
+ if (cells.is_empty(i)) {
+ cur = '.';
} else {
- ss += 'M';
+ cur = std::to_string(cells.pos_get(i));
+ }
+ const int n = cur.size();
+ for (int j = 0; j < 5 - n; ++j) {
+ cur += ' ';
+ }
+ ss += cur;
+ if (i%256 == 255) {
+ ss += " *";
+ }
+ if (i%64 == 63) {
+ ss += '\n';
}
}
- if (i%256 == 255) {
- ss += " *";
- ss += '\n';
- }
+ LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
}
- LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
- }
- if ((debug == 2 && n_swa > 0) || debug > 2) {
- std::string ss;
- for (uint32_t i = 0; i < cells.size(); ++i) {
- std::string cur;
- if (cells.is_empty(i)) {
- cur = '.';
- } else {
- cur = std::to_string(cells.pos_get(i));
- }
- const int n = cur.size();
- for (int j = 0; j < 5 - n; ++j) {
- cur += ' ';
- }
- ss += cur;
- if (i%256 == 255) {
- ss += " *";
- }
- if (i%64 == 63) {
- ss += '\n';
+ for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {
+ if (cells.seq_pos_min(s) < 0) {
+ continue;
}
- }
- LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
- }
- for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {
- if (cells.seq_pos_min(s) < 0) {
- continue;
+ LLAMA_LOG_DEBUG("%s: stream[%d] min[%d] = %5d, max[%d] = %5d\n", __func__, stream_id, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
}
-
- LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
}
}