]> git.djapps.eu Git - pkg/ggml/sources/ggml/commit
CUDA: refactor and deduplicate vector FA kernels (llama/16208)
authorJohannes Gäßler <redacted>
Sat, 27 Sep 2025 16:45:07 +0000 (18:45 +0200)
committerGeorgi Gerganov <redacted>
Mon, 29 Sep 2025 09:41:09 +0000 (12:41 +0300)
commit4977596ca0efb781339a111feb1d334efada08c2
tree46fca32956302523601017d5a61ac6eb75226111
parent8ddb53cd70cf6f9d7c661943c4de5e206243540c
CUDA: refactor and deduplicate vector FA kernels (llama/16208)

* CUDA: refactor and deduplicate vector FA kernels
129 files changed:
src/ggml-cuda/common.cuh
src/ggml-cuda/fattn-common.cuh
src/ggml-cuda/fattn-vec-f16.cuh [deleted file]
src/ggml-cuda/fattn-vec-f32.cuh [deleted file]
src/ggml-cuda/fattn-vec.cuh [new file with mode: 0644]
src/ggml-cuda/fattn.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu [deleted file]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/generate_cu_files.py