#pragma warning(disable: 4244 4267) // possible loss of data
#endif
-static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant";
-
static llama_context ** g_ctx;
static llama_model ** g_model;
static common_sampler ** g_smpl;
std::vector<llama_token> embd_inp;
+ bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
common_chat_msg new_msg;
new_msg.role = role;
};
{
- auto prompt = (params.conversation_mode && params.enable_chat_template)
- // format the system prompt in conversation mode (fallback to default if empty)
- ? chat_add_and_format("system", params.system_prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt)
+ std::string prompt;
+
+ if (params.conversation_mode && params.enable_chat_template) {
+ // format the system prompt in conversation mode (will use template default if empty)
+ prompt = params.system_prompt;
+
+ if (!prompt.empty()) {
+ prompt = chat_add_and_format("system", prompt);
+ }
+ } else {
// otherwise use the prompt as is
- : params.prompt;
+ prompt = params.prompt;
+ }
+
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
LOG_DBG("tokenize the prompt\n");
embd_inp = common_tokenize(ctx, prompt, true, true);
}
// Should not run without any tokens
- if (embd_inp.empty()) {
+ if (!params.conversation_mode && embd_inp.empty()) {
if (add_bos) {
embd_inp.push_back(llama_vocab_bos(vocab));
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
}
// deal with end of generation tokens in interactive mode
- if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
+ if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
LOG_DBG("found an EOG token\n");
if (params.interactive) {
}
// if current token is not EOG, we add it to current assistant message
- if (params.conversation_mode) {
+ if (params.conversation_mode && !waiting_for_first_input) {
const auto id = common_sampler_last(smpl);
assistant_ss << common_token_to_piece(ctx, id, false);
}
- if (n_past > 0 && is_interacting) {
+ if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
LOG_DBG("waiting for user input\n");
if (params.conversation_mode) {
input_echo = false; // do not echo this again
}
- if (n_past > 0) {
+ if (n_past > 0 || waiting_for_first_input) {
if (is_interacting) {
common_sampler_reset(smpl);
}
is_interacting = false;
+ waiting_for_first_input = false;
}
}