}
json to_json_oaicompat_chat() {
- bool first = n_decoded == 0;
+ bool first = n_decoded == 1;
std::time_t t = std::time(0);
json choices;
{"delta", json{{"role", "assistant"}}}}});
} else {
// We have to send this as two updates to conform to openai behavior
+ // initial_ret is the role message for stream=True
json initial_ret = json{{"choices", json::array({json{
{"finish_reason", nullptr},
{"index", 0},
{"delta", json{
- {"role", "assistant"}
+ {"role", "assistant"},
+ {"content", ""}
}}}})},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
+ {"system_fingerprint", build_info},
{"object", "chat.completion.chunk"}};
json second_ret = json{
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
+ {"system_fingerprint", build_info},
{"object", "chat.completion.chunk"}};
+ if (prob_output.probs.size() > 0) {
+ second_ret["choices"][0]["logprobs"] = json{
+ {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
+ };
+ }
+
+ if (timings.prompt_n >= 0) {
+ second_ret.push_back({"timings", timings.to_json()});
+ }
+
return std::vector<json>({initial_ret, second_ret});
}
} else {
})
content = ""
last_cmpl_id = None
- for data in res:
+ for i, data in enumerate(res):
choice = data["choices"][0]
+ if i == 0:
+ # Check first role message for stream=True
+ assert choice["delta"]["content"] == ""
+ assert choice["delta"]["role"] == "assistant"
+ else:
+ assert "role" not in choice["delta"]
assert data["system_fingerprint"].startswith("b")
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
if last_cmpl_id is None:
"stream": True,
"timings_per_token": True,
})
- for data in res:
- assert "timings" in data
- assert "prompt_per_second" in data["timings"]
- assert "predicted_per_second" in data["timings"]
- assert "predicted_n" in data["timings"]
- assert data["timings"]["predicted_n"] <= 10
+ for i, data in enumerate(res):
+ if i == 0:
+ # Check first role message for stream=True
+ assert data["choices"][0]["delta"]["content"] == ""
+ assert data["choices"][0]["delta"]["role"] == "assistant"
+ else:
+ assert "role" not in data["choices"][0]["delta"]
+ assert "timings" in data
+ assert "prompt_per_second" in data["timings"]
+ assert "predicted_per_second" in data["timings"]
+ assert "predicted_n" in data["timings"]
+ assert data["timings"]["predicted_n"] <= 10
def test_logprobs():
)
output_text = ''
aggregated_text = ''
- for data in res:
+ for i, data in enumerate(res):
choice = data.choices[0]
- if choice.finish_reason is None:
- if choice.delta.content:
- output_text += choice.delta.content
- assert choice.logprobs is not None
- assert choice.logprobs.content is not None
- for token in choice.logprobs.content:
- aggregated_text += token.token
- assert token.logprob <= 0.0
- assert token.bytes is not None
- assert token.top_logprobs is not None
- assert len(token.top_logprobs) > 0
+ if i == 0:
+ # Check first role message for stream=True
+ assert choice.delta.content == ""
+ assert choice.delta.role == "assistant"
+ else:
+ assert choice.delta.role is None
+ if choice.finish_reason is None:
+ if choice.delta.content:
+ output_text += choice.delta.content
+ assert choice.logprobs is not None
+ assert choice.logprobs.content is not None
+ for token in choice.logprobs.content:
+ aggregated_text += token.token
+ assert token.logprob <= 0.0
+ assert token.bytes is not None
+ assert token.top_logprobs is not None
+ assert len(token.top_logprobs) > 0
assert aggregated_text == output_text