* kv-cache : use ggml_set_rows
ggml-ci
* graph : separate k and v indices
ggml-ci
* cont : remove redundant ifs
ggml-ci
* kv-cache : improve find_slot impl
* kv-cache : bounds-check when accessing slot_info indices
* kv-cache : add comments
ggml-ci
* ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends
ggml-ci
return false;
}
} break;
+ case GGML_OP_SET_ROWS:
+ {
+ // TODO: add support
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14274
+ return false;
+ } break;
case GGML_OP_CPY: {
ggml_tensor *src = op->src[0];
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
default:
return false;
}
+ case GGML_OP_SET_ROWS:
+ {
+ // TODO: add support
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14274
+ return false;
+ } break;
case GGML_OP_CPY:
case GGML_OP_DUP:
case GGML_OP_CONT:
return false;
}
}
+ case GGML_OP_SET_ROWS:
+ {
+ // TODO: add support
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14274
+ return false;
+ } break;
case GGML_OP_CPY:
{
ggml_type src0_type = op->src[0]->type;
return false;
}
} break;
+ case GGML_OP_SET_ROWS:
+ {
+ // TODO: add support
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14274
+ return false;
+ } break;
case GGML_OP_CONT:
case GGML_OP_CPY:
case GGML_OP_DUP: