for (uint32_t i = 0; i < n_kv; ++i) {
assert(dinfo.ids[i] <= n_kv);
- if (dinfo.ids[i] == n_kv) {
+ if (dinfo.ids[i] == n_kv || dinfo.ids[i] == i) {
continue;
}
const auto & n_embd_head_k = hparams.n_embd_head_k;
//const auto & n_embd_head_v = hparams.n_embd_head_v;
- //GGML_ASSERT(kv_self->size == n_ctx);
-
auto inp = std::make_unique<llm_graph_input_k_shift>(this);
- inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cparams.n_ctx);
+ inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size());
ggml_set_input(inp->k_shift);
for (const auto & layer : layers) {
assert(isrc < pos.size());
assert(idst < pos.size());
+ assert(pos[idst] == -1);
+ assert(pos[isrc] != -1);
+
pos [idst] = pos [isrc];
shift[idst] = shift[isrc];
seq [idst] = seq [isrc];
assert(pos[i] != -1);
seq_pos_rm(i);
+ seq[i].reset();
pos[i] = -1;
- seq[i].reset();
+ shift[i] = 0;
used.erase(i);
}
if (seq[i].none()) {
pos[i] = -1;
+ shift[i] = 0;
used.erase(i);
seq[i].reset();
pos[i] = -1;
+ shift[i] = 0;
used.erase(i);
pos[i] += d;
shift[i] += d;
- seq_pos_add(i);
-
has_shift = true;
if (pos[i] < 0) {
- seq_pos_rm(i);
-
seq[i].reset();
pos[i] = -1;
+ shift[i] = 0;
used.erase(i);
return true;
}
+ seq_pos_add(i);
+
return false;
}