]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commit
CUDA: quantized KV support for FA vec (llama/7527)
authorJohannes Gäßler <redacted>
Sat, 1 Jun 2024 06:44:14 +0000 (08:44 +0200)
committerGeorgi Gerganov <redacted>
Sun, 16 Jun 2024 15:19:48 +0000 (18:19 +0300)
commit5582039d0a7f1454449e42e7c12c698ea4358dfb
treec024f879dbbb41cff1d4bffad38d4488cb80f3bf
parent9a16c643e29e3cc3178ca97e0211fdf421ad1d03
CUDA: quantized KV support for FA vec (llama/7527)

* CUDA: quantized KV support for FA vec

* try CI fix

* fix commented-out kernel variants

* add q8_0 q4_0 tests

* fix nwarps > batch size

* split fattn compile via extern templates

* fix flake8

* fix metal tests

* fix cmake

* make generate_cu_files.py executable

* add autogenerated .cu files

* fix AMD

* error if type_v != FP16 and not flash_attn

* remove obsolete code
103 files changed:
ggml-cuda.cu
ggml-cuda/fattn-common.cuh
ggml-cuda/fattn-tile-f16.cu
ggml-cuda/fattn-tile-f32.cu
ggml-cuda/fattn-vec-f16.cuh
ggml-cuda/fattn-vec-f32.cuh
ggml-cuda/fattn-wmma-f16.cuh [new file with mode: 0644]
ggml-cuda/fattn.cu
ggml-cuda/mmq.cu
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu [new file with mode: 0644]
ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu [new file with mode: 0644]
ggml-cuda/template-instances/generate_cu_files.py [new file with mode: 0755]
ggml-cuda/vecdotq.cuh
ggml-metal.m