]> git.djapps.eu Git - pkg/ggml/sources/ggml/commit
CUDA: optimize FA for GQA + large batches (llama/12014)
authorJohannes Gäßler <redacted>
Sat, 22 Feb 2025 11:20:17 +0000 (12:20 +0100)
committerGeorgi Gerganov <redacted>
Tue, 25 Feb 2025 11:33:09 +0000 (13:33 +0200)
commitf41f57aa499e6298413db9a242fe88cc88382e15
treedeb4314e1f710b64b9a4f60bf8cb1633c21cd4ab
parent71efb7582ba4d9e5045f15514e65d35357021fe5
CUDA: optimize FA for GQA + large batches (llama/12014)
32 files changed:
src/ggml-cuda/cp-async.cuh
src/ggml-cuda/fattn-common.cuh
src/ggml-cuda/fattn-mma-f16.cuh
src/ggml-cuda/fattn-tile-f16.cu
src/ggml-cuda/fattn-tile-f32.cu
src/ggml-cuda/fattn-vec-f16.cuh
src/ggml-cuda/fattn-vec-f32.cuh
src/ggml-cuda/fattn-wmma-f16.cu
src/ggml-cuda/fattn.cu
src/ggml-cuda/mma.cuh
src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu [deleted file]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu [deleted file]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu [deleted file]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/generate_cu_files.py
tests/test-backend-ops.cpp