]> git.djapps.eu Git - pkg/ggml/sources/ggml/commit
supprt Flash Attention for fp32/fp16/Q4/Q5/Q8 (llama/20190)
authorNeo Zhang <redacted>
Sun, 8 Mar 2026 04:00:07 +0000 (12:00 +0800)
committerGeorgi Gerganov <redacted>
Sun, 15 Mar 2026 19:50:13 +0000 (21:50 +0200)
commit38a8d9b60772108abb1c562625694f8590e8b84c
tree753005b6c8c9281b38773aea3b419c83ed566112
parent36ae4059ee87885f0e606a2ec7b1a227ad0caf85
supprt Flash Attention for fp32/fp16/Q4/Q5/Q8 (llama/20190)

* support flash-attention for fp32/fp16/Q4/Q5/Q8

* rm warining

* update for JIT
62 files changed:
src/ggml-sycl/CMakeLists.txt
src/ggml-sycl/backend.hpp
src/ggml-sycl/common.hpp
src/ggml-sycl/convert.cpp
src/ggml-sycl/convert.hpp
src/ggml-sycl/count-equal.cpp
src/ggml-sycl/dpct/helper.hpp
src/ggml-sycl/fattn-common.hpp [new file with mode: 0644]
src/ggml-sycl/fattn-tile.cpp [new file with mode: 0644]
src/ggml-sycl/fattn-tile.hpp [new file with mode: 0644]
src/ggml-sycl/fattn-vec.hpp [new file with mode: 0644]
src/ggml-sycl/fattn.cpp [new file with mode: 0644]
src/ggml-sycl/fattn.hpp [new file with mode: 0644]
src/ggml-sycl/ggml-sycl.cpp
src/ggml-sycl/presets.hpp
src/ggml-sycl/softmax.cpp
src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp [new file with mode: 0644]
src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp [new file with mode: 0644]
src/ggml-sycl/vecdotq.hpp