} else if (!content_parts.empty()) {
if (concat_typed_text) {
std::string text;
+ bool last_was_media_marker = false;
+ // join parts with newline, do not add newline before or after media markers
for (const auto & part : content_parts) {
- if (part.type != "text") {
+ bool add_new_line = true;
+ if (part.type == "text") {
+ add_new_line = !last_was_media_marker && !text.empty();
+ last_was_media_marker = false;
+ } else if (part.type == "media_marker") {
+ add_new_line = false;
+ last_was_media_marker = true;
+ } else {
LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
continue;
}
- if (!text.empty()) {
+
+ if (add_new_line) {
text += '\n';
}
+
text += part.text;
}
jmsg["content"] = text;
throw std::invalid_argument("Missing content part type: " + part.dump());
}
const auto & type = part.at("type");
- if (type != "text") {
+ if (type != "text" && type != "media_marker") {
throw std::invalid_argument("Unsupported content part type: " + type.dump());
}
common_chat_msg_content_part msg_part;
for (const auto & msg : inputs.messages) {
auto content = msg.content;
for (const auto & part : msg.content_parts) {
- if (part.type != "text") {
+ if (part.type != "text" && part.type != "media_marker") {
LOG_WRN("Ignoring non-text content part: %s\n", part.type.c_str());
continue;
}
json image_url = json_value(p, "image_url", json::object());
handle_media(out_files, image_url, opt.media_path);
- // replace this chunk with a marker
- p["type"] = "text";
+ p["type"] = "media_marker";
p["text"] = mtmd_default_marker();
p.erase("image_url");
// TODO: add audio_url support by reusing handle_media()
- // replace this chunk with a marker
- p["type"] = "text";
+ p["type"] = "media_marker";
p["text"] = mtmd_default_marker();
p.erase("input_audio");