]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commit
CUDA: optimize FA for GQA + large batches (#12014)
authorJohannes Gäßler <redacted>
Sat, 22 Feb 2025 11:20:17 +0000 (12:20 +0100)
committerGitHub <redacted>
Sat, 22 Feb 2025 11:20:17 +0000 (12:20 +0100)
commit5fa07c2f93c73161bf09ef0b23b5d2686f9a073e
tree81901902dd4c7630559b52003f57e27388650bbb
parent335eb04a91f481f37c0c9b302ee31b449b04c3e9
CUDA: optimize FA for GQA + large batches (#12014)
32 files changed:
ggml/src/ggml-cuda/cp-async.cuh
ggml/src/ggml-cuda/fattn-common.cuh
ggml/src/ggml-cuda/fattn-mma-f16.cuh
ggml/src/ggml-cuda/fattn-tile-f16.cu
ggml/src/ggml-cuda/fattn-tile-f32.cu
ggml/src/ggml-cuda/fattn-vec-f16.cuh
ggml/src/ggml-cuda/fattn-vec-f32.cuh
ggml/src/ggml-cuda/fattn-wmma-f16.cu
ggml/src/ggml-cuda/fattn.cu
ggml/src/ggml-cuda/mma.cuh
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/generate_cu_files.py
tests/test-backend-ops.cpp