]> git.djapps.eu Git - pkg/ggml/sources/ggml/commit
CUDA: refactor mmq, dmmv, mmvq (llama/7716)
authorJohannes Gäßler <redacted>
Wed, 5 Jun 2024 14:53:00 +0000 (16:53 +0200)
committerGeorgi Gerganov <redacted>
Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
commit87dd7dddf53612318c3a7e86ab1f589f9b31ca0d
treeac374c044792de7815efcdfa2bb86d9e6eaeecd5
parentf27d7fc0b09a1c14540dd32e66cedf71f9cdd997
CUDA: refactor mmq, dmmv, mmvq (llama/7716)

* CUDA: refactor mmq, dmmv, mmvq

* fix out-of-bounds write

* struct for qk, qr, qi

* fix cmake build

* mmq_type_traits
110 files changed:
src/ggml-common.h
src/ggml-cuda.cu
src/ggml-cuda/common.cuh
src/ggml-cuda/dmmv.cu
src/ggml-cuda/mmq.cu
src/ggml-cuda/mmq.cuh
src/ggml-cuda/mmvq.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
src/ggml-cuda/template-instances/generate_cu_files.py
src/ggml-cuda/template-instances/mmq-instance-q2_k.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q3_k.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q4_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q4_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q4_k.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q5_0.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q5_1.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q5_k.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q6_k.cu [new file with mode: 0644]
src/ggml-cuda/template-instances/mmq-instance-q8_0.cu [new file with mode: 0644]
src/ggml-cuda/vecdotq.cuh