add_executable(${TARGET} ${TARGET_SRCS})
install(TARGETS ${TARGET} RUNTIME)
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
if (LLAMA_SERVER_SSL)
return;
}
+ bool use_base64 = false;
+ if (body.count("encoding_format") != 0) {
+ const std::string& format = body.at("encoding_format");
+ if (format == "base64") {
+ use_base64 = true;
+ } else if (format != "float") {
+ res_error(res, format_error_response("The format to return the embeddings in. Can be either float or base64", ERROR_TYPE_INVALID_REQUEST));
+ return;
+ }
+ }
+
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.ctx, prompt, true, true);
for (const auto & tokens : tokenized_prompts) {
// this check is necessary for models that do not add BOS token to the input
}
// write JSON response
- json root = oaicompat ? format_embeddings_response_oaicompat(body, responses) : json(responses);
+ json root = oaicompat ? format_embeddings_response_oaicompat(body, responses, use_base64) : json(responses);
res_ok(res, root);
};
+import base64
+import struct
import pytest
from openai import OpenAI
from utils import *
assert res.status_code == 200
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
assert res.body['usage']['prompt_tokens'] == 2 * 9
+
+
+def test_embedding_openai_library_base64():
+ server.start()
+ test_input = "Test base64 embedding output"
+
+ # get embedding in default format
+ res = server.make_request("POST", "/v1/embeddings", data={
+ "input": test_input
+ })
+ assert res.status_code == 200
+ vec0 = res.body["data"][0]["embedding"]
+
+ # get embedding in base64 format
+ res = server.make_request("POST", "/v1/embeddings", data={
+ "input": test_input,
+ "encoding_format": "base64"
+ })
+
+ assert res.status_code == 200
+ assert "data" in res.body
+ assert len(res.body["data"]) == 1
+
+ embedding_data = res.body["data"][0]
+ assert "embedding" in embedding_data
+ assert isinstance(embedding_data["embedding"], str)
+
+ # Verify embedding is valid base64
+ decoded = base64.b64decode(embedding_data["embedding"])
+ # Verify decoded data can be converted back to float array
+ float_count = len(decoded) // 4 # 4 bytes per float
+ floats = struct.unpack(f'{float_count}f', decoded)
+ assert len(floats) > 0
+ assert all(isinstance(x, float) for x in floats)
+ assert len(floats) == len(vec0)
+
+ # make sure the decoded data is the same as the original
+ for x, y in zip(floats, vec0):
+ assert abs(x - y) < EPSILON
#include "common.h"
#include "log.h"
#include "llama.h"
+#include "common/base64.hpp"
#ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error
return llama_params;
}
-static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
+static json format_embeddings_response_oaicompat(const json & request, const json & embeddings, bool use_base64 = false) {
json data = json::array();
int32_t n_tokens = 0;
int i = 0;
for (const auto & elem : embeddings) {
- data.push_back(json{
- {"embedding", json_value(elem, "embedding", json::array())},
- {"index", i++},
- {"object", "embedding"}
- });
+ json embedding_obj;
+
+ if (use_base64) {
+ const auto& vec = json_value(elem, "embedding", json::array()).get<std::vector<float>>();
+ const char* data_ptr = reinterpret_cast<const char*>(vec.data());
+ size_t data_size = vec.size() * sizeof(float);
+ embedding_obj = {
+ {"embedding", base64::encode(data_ptr, data_size)},
+ {"index", i++},
+ {"object", "embedding"},
+ {"encoding_format", "base64"}
+ };
+ } else {
+ embedding_obj = {
+ {"embedding", json_value(elem, "embedding", json::array())},
+ {"index", i++},
+ {"object", "embedding"}
+ };
+ }
+ data.push_back(embedding_obj);
n_tokens += json_value(elem, "tokens_evaluated", 0);
}