]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commit
CUDA: refactor and deduplicate vector FA kernels (llama/16208)
authorJohannes Gäßler <redacted>
Sat, 27 Sep 2025 16:45:07 +0000 (18:45 +0200)
committerGeorgi Gerganov <redacted>
Mon, 29 Sep 2025 12:18:11 +0000 (15:18 +0300)
commite856483cd6adb0014ee6c7e7ebdb6dc6c841944b
tree6bd7b187f454cf7a93ea38343d371f4c36adadaf
parent88dd9e0d452432d819ad8fc9054f986327bd1eee
CUDA: refactor and deduplicate vector FA kernels (llama/16208)

* CUDA: refactor and deduplicate vector FA kernels
129 files changed:
ggml/src/ggml-cuda/common.cuh
ggml/src/ggml-cuda/fattn-common.cuh
ggml/src/ggml-cuda/fattn-vec-f16.cuh [deleted file]
ggml/src/ggml-cuda/fattn-vec-f32.cuh [deleted file]
ggml/src/ggml-cuda/fattn-vec.cuh [new file with mode: 0644]
ggml/src/ggml-cuda/fattn.cu
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/generate_cu_files.py