]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commit
CUDA: optimize FA for GQA + large batches (llama/12014)
authorJohannes Gäßler <redacted>
Sat, 22 Feb 2025 11:20:17 +0000 (12:20 +0100)
committerGeorgi Gerganov <redacted>
Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
commit2d70cd36d7228fdfd911b247bd315b0aa2937ca9
tree4f3f67a6fe557a1daf293d27e6e6170f8cc3b90e
parent98dab49b9aec703ba2ac13e7e86be6122a4adbed
CUDA: optimize FA for GQA + large batches (llama/12014)
31 files changed:
ggml/src/ggml-cuda/cp-async.cuh
ggml/src/ggml-cuda/fattn-common.cuh
ggml/src/ggml-cuda/fattn-mma-f16.cuh
ggml/src/ggml-cuda/fattn-tile-f16.cu
ggml/src/ggml-cuda/fattn-tile-f32.cu
ggml/src/ggml-cuda/fattn-vec-f16.cuh
ggml/src/ggml-cuda/fattn-vec-f32.cuh
ggml/src/ggml-cuda/fattn-wmma-f16.cu
ggml/src/ggml-cuda/fattn.cu
ggml/src/ggml-cuda/mma.cuh
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/generate_cu_files.py