]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commit
CUDA: refactor and deduplicate vector FA kernels (#16208)
authorJohannes Gäßler <redacted>
Sat, 27 Sep 2025 16:45:07 +0000 (18:45 +0200)
committerGitHub <redacted>
Sat, 27 Sep 2025 16:45:07 +0000 (18:45 +0200)
commit75a3a6c2cd0002ba40e2dcc92007bc9fdbc69f1a
tree4e14239df6c4727b655811a5296c0ab00daa2d8f
parent0499b29c6f64c705faaf5860dc4600fca23671f4
CUDA: refactor and deduplicate vector FA kernels (#16208)

* CUDA: refactor and deduplicate vector FA kernels
129 files changed:
ggml/src/ggml-cuda/common.cuh
ggml/src/ggml-cuda/fattn-common.cuh
ggml/src/ggml-cuda/fattn-vec-f16.cuh [deleted file]
ggml/src/ggml-cuda/fattn-vec-f32.cuh [deleted file]
ggml/src/ggml-cuda/fattn-vec.cuh [new file with mode: 0644]
ggml/src/ggml-cuda/fattn.cu
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu [deleted file]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu [new file with mode: 0644]
ggml/src/ggml-cuda/template-instances/generate_cu_files.py