// don't update the cache if the slot's context is empty
update_cache = update_cache && tokens.size() > 0;
- // TODO: mtmd does not support prompt cache
- update_cache = update_cache && (ret->mctx == nullptr);
-
if (update_cache) {
SRV_WRN("%s", "updating prompt cache\n");
return nullptr;
}
- // TODO: for some reason we can't copy server_tokens, so we have to do this workaround
auto & cur = states.emplace_back();
cur = {
- /*.tokens =*/ server_tokens(prompt.tokens.get_text_tokens(), false),
+ /*.tokens =*/ prompt.tokens.clone(),
/*.data =*/ std::move(state_data),
/*.checkpoints =*/ prompt.checkpoints,
};