`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `content`. You can determine the place of the image in the content as in the following: `Image: [img-21].\nCaption: This is a picture of a house`. In this case, `[img-21]` will be replaced by the embeddings of the image with id `21` in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 21}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
+`embd_normalize`: Normalization for pooled embeddings. Can be one of the following values:
+```
+ -1: No normalization
+ 0: Max absolute
+ 1: Taxicab
+ 2: Euclidean/L2
+ >2: P-Norm
+```
+
### POST `/reranking`: Rerank documents according to a given query
Similar to https://jina.ai/reranker/ but might change in the future.
std::string oaicompat_cmpl_id;
common_chat_syntax oaicompat_chat_syntax;
+ // Embeddings
+ int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
+
json to_json() const {
std::vector<std::string> samplers;
samplers.reserve(sampling.samplers.size());
// normalize only when there is pooling
if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) {
- common_embd_normalize(embd, embd_res.data(), n_embd, 2);
+ common_embd_normalize(embd, embd_res.data(), n_embd, slot.params.embd_normalize);
res->embedding.push_back(embd_res);
break;
} else {
}
}
+ int embd_normalize = 2; // default to Euclidean/L2 norm
+ if (body.count("embd_normalize") != 0) {
+ embd_normalize = body.at("embd_normalize");
+ if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) {
+ SRV_DBG("embd_normalize is not supported by pooling type %d, ignoring it\n", llama_pooling_type(ctx_server.ctx));
+ }
+ }
+
// create and queue the task
json responses = json::array();
bool error = false;
// OAI-compat
task.params.oaicompat = oaicompat;
+ task.params.embd_normalize = embd_normalize;
tasks.push_back(std::move(task));
}