# ==============================================================================
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
ENTRYPOINT [ "/app/llama-cli" ]
### Light, CLI only
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
### Light, CLI only
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
FROM base AS light
COPY --from=build /app/lib/ /app
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
### Light, CLI only
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
### Light, CLI only
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
-COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
+COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
exec ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
exec ./llama-cli "$@"
+elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
+ exec ./llama-completion "$@"
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
exec ./llama-bench "$@"
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
else
echo "Unknown command: $arg1"
echo "Available commands: "
- echo " --run (-r): Run a model previously converted into ggml"
- echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
+ echo " --run (-r): Run a model (chat) previously converted into ggml"
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin"
+ echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
echo " ex: -m model.gguf"
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
### Light, CLI only
FROM base AS light
-COPY --from=build /app/full/llama-cli /app
+COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app