// note: tracking the other way around is not necessary for now
//seq_cpl[s0][s1] = true;
+
+ has_cpl = true;
}
}
}
return ubatch_add(idxs, idxs.size(), false);
}
-llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch) {
+llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch, bool sequential) {
+ if (sequential && has_cpl) {
+ LLAMA_LOG_ERROR("%s: sequential split is not supported when there are coupled sequences in the input batch\n", __func__);
+
+ return {};
+ }
+
std::vector<seq_set_t> cur_seq_set;
+ llama_seq_id last_seq_id = -1;
+
// determine the non-overlapping sequence sets participating in this ubatch
for (int32_t i = 0; i < batch.n_tokens; ++i) {
if (used[i]) {
}
}
+ // accept only increasing sequence ids
+ if (sequential) {
+ add = add && (cur_seq_set.empty() || batch.seq_id[i][0] == last_seq_id + 1);
+ }
+
if (add) {
cur_seq_set.push_back(seq_set[i]);
+ last_seq_id = batch.seq_id[i][0];
+
if (cur_seq_set.size() > n_ubatch) {
break;
}
llama_ubatch split_simple(uint32_t n_ubatch);
// make ubatches of equal-length sequences sets
- llama_ubatch split_equal(uint32_t n_ubatch);
+ // if sequential == true, the tokens in the ubatch will have increasing sequential sequence ids
+ llama_ubatch split_equal(uint32_t n_ubatch, bool sequential);
// sequence-set-wise split - each ubatch contains a single sequence-set
llama_ubatch split_seq(uint32_t n_ubatch);
using pos_set_t = std::set<llama_pos>;
using seq_cpl_t = std::vector<bool>;
+ // helper flag to quickly determine if there are any coupled sequences in the batch
+ bool has_cpl;
+
std::vector<pos_set_t> seq_pos; // seq_pos[s]: the set of positions in sequence s
std::vector<seq_cpl_t> seq_cpl; // seq_cpl[s0][s1]: if sequence s0 is coupled to sequence s1
std::vector<llama_ubatch> ubatches;
while (true) {
- auto ubatch = balloc.split_equal(n_ubatch);
+ auto ubatch = balloc.split_equal(n_ubatch, false);
if (ubatch.n_tokens == 0) {
break;
// if all tokens are output, split by sequence
ubatch = balloc.split_seq(n_ubatch);
} else {
- ubatch = balloc.split_equal(n_ubatch);
+ ubatch = balloc.split_equal(n_ubatch, false);
}
if (ubatch.n_tokens == 0) {
// if all tokens are output, split by sequence
ubatch = balloc.split_seq(n_ubatch);
} else {
- ubatch = balloc.split_equal(n_ubatch);
+ ubatch = balloc.split_equal(n_ubatch, false);
}
if (balloc.get_n_used() < balloc.get_n_tokens()) {