// if all tokens are output, split by sequence
ubatch = balloc.split_seq(n_ubatch);
} else {
- ubatch = balloc.split_equal(n_ubatch, false);
+ // TODO: non-sequential equal split can be done if using unified KV cache
+ // for simplicity, we always use sequential equal split for now
+ ubatch = balloc.split_equal(n_ubatch, true);
}
if (ubatch.n_tokens == 0) {
// if all tokens are output, split by sequence
ubatch = balloc.split_seq(n_ubatch);
} else {
- ubatch = balloc.split_equal(n_ubatch, false);
+ // TODO: non-sequential equal split can be done if using unified KV cache
+ // for simplicity, we always use sequential equal split for now
+ ubatch = balloc.split_equal(n_ubatch, true);
}
if (ubatch.n_tokens == 0) {